From: Eric Anholt Date: Mon, 2 May 2011 16:45:40 +0000 (-0700) Subject: i965: Start adding the VS visitor and codegen. X-Git-Tag: mesa-8.0-rc1~2728^2~233 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=af3c9803d818fd33139f1247a387d64b967b8992;p=platform%2Fupstream%2Fmesa.git i965: Start adding the VS visitor and codegen. The low-level IR is a mashup of brw_fs.cpp and ir_to_mesa.cpp. It's currently controlled by the INTEL_NEW_VS=1 environment variable, and only tested for the trivial "gl_Position = gl_Vertex;" shader so far. --- diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 44f28cd..45a5350 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -124,7 +124,10 @@ CXX_SOURCES = \ brw_fs_reg_allocate.cpp \ brw_fs_schedule_instructions.cpp \ brw_fs_vector_splitting.cpp \ - brw_shader.cpp + brw_shader.cpp \ + brw_vec4_emit.cpp \ + brw_vec4_reg_allocate.cpp \ + brw_vec4_visitor.cpp ASM_SOURCES = diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index cc11d06..7b6b64c 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -529,7 +529,7 @@ struct brw_context * the CURBE, the depth buffer, and a query BO. */ drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16]; - int validated_bo_count; + unsigned int validated_bo_count; } state; struct brw_cache cache; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index da8d016..e3823c6 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -638,6 +638,8 @@ enum opcode { FS_OPCODE_SPILL, FS_OPCODE_UNSPILL, FS_OPCODE_PULL_CONSTANT_LOAD, + + VS_OPCODE_URB_WRITE, }; #define BRW_PREDICATE_NONE 0 diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 72d50ea..38dd99b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -44,6 +44,9 @@ #define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) #define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) #define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1) +#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2) +#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 285ba46..7367cca 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -146,38 +146,7 @@ void fs_visitor::generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src) { - int op; - - switch (inst->opcode) { - case SHADER_OPCODE_RCP: - op = BRW_MATH_FUNCTION_INV; - break; - case SHADER_OPCODE_RSQ: - op = BRW_MATH_FUNCTION_RSQ; - break; - case SHADER_OPCODE_SQRT: - op = BRW_MATH_FUNCTION_SQRT; - break; - case SHADER_OPCODE_EXP2: - op = BRW_MATH_FUNCTION_EXP; - break; - case SHADER_OPCODE_LOG2: - op = BRW_MATH_FUNCTION_LOG; - break; - case SHADER_OPCODE_POW: - op = BRW_MATH_FUNCTION_POW; - break; - case SHADER_OPCODE_SIN: - op = BRW_MATH_FUNCTION_SIN; - break; - case SHADER_OPCODE_COS: - op = BRW_MATH_FUNCTION_COS; - break; - default: - assert(!"not reached: unknown math function"); - op = 0; - break; - } + int op = brw_math_function(inst->opcode); if (intel->gen >= 6) { assert(inst->mlen == 0); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index f4005f8..2eeeec2 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -199,3 +199,29 @@ brw_conditional_for_comparison(unsigned int op) return BRW_CONDITIONAL_NZ; } } + +uint32_t +brw_math_function(enum opcode op) +{ + switch (op) { + case SHADER_OPCODE_RCP: + return BRW_MATH_FUNCTION_INV; + case SHADER_OPCODE_RSQ: + return BRW_MATH_FUNCTION_RSQ; + case SHADER_OPCODE_SQRT: + return BRW_MATH_FUNCTION_SQRT; + case SHADER_OPCODE_EXP2: + return BRW_MATH_FUNCTION_EXP; + case SHADER_OPCODE_LOG2: + return BRW_MATH_FUNCTION_LOG; + case SHADER_OPCODE_POW: + return BRW_MATH_FUNCTION_POW; + case SHADER_OPCODE_SIN: + return BRW_MATH_FUNCTION_SIN; + case SHADER_OPCODE_COS: + return BRW_MATH_FUNCTION_COS; + default: + assert(!"not reached: unknown math function"); + return 0; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 21671d1..1054d7a 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -22,8 +22,10 @@ */ #include +#include "brw_defines.h" #pragma once int brw_type_for_base_type(const struct glsl_type *type); uint32_t brw_conditional_for_comparison(unsigned int op); +uint32_t brw_math_function(enum opcode op); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h new file mode 100644 index 0000000..10168fc --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -0,0 +1,434 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BRW_VEC4_H +#define BRW_VEC4_H + +#include +#include "brw_shader.h" +#include "main/compiler.h" +#include "program/hash_table.h" + +extern "C" { +#include "brw_vs.h" +#include "brw_context.h" +#include "brw_eu.h" +}; + +#include "../glsl/ir.h" + +namespace brw { + +class dst_reg; + +/** + * Common helper for constructing swizzles. When only a subset of + * channels of a vec4 are used, we don't want to reference the other + * channels, as that will tell optimization passes that those other + * channels are used. + */ +static int +swizzle_for_size(int size) +{ + int size_swizzles[4] = { + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), + }; + + assert((size >= 1) && (size <= 4)); + return size_swizzles[size - 1]; +} + +enum register_file { + ARF = BRW_ARCHITECTURE_REGISTER_FILE, + GRF = BRW_GENERAL_REGISTER_FILE, + MRF = BRW_MESSAGE_REGISTER_FILE, + IMM = BRW_IMMEDIATE_VALUE, + HW_REG, /* a struct brw_reg */ + ATTR, + UNIFORM, /* prog_data->params[hw_reg] */ + BAD_FILE +}; + +class reg +{ +public: + /** Register file: ARF, GRF, MRF, IMM. */ + enum register_file file; + /** virtual register number. 0 = fixed hw reg */ + int reg; + /** Offset within the virtual register. */ + int reg_offset; + /** Register type. BRW_REGISTER_TYPE_* */ + int type; + bool sechalf; + struct brw_reg fixed_hw_reg; + int smear; /* -1, or a channel of the reg to smear to all channels. */ + + /** Value for file == BRW_IMMMEDIATE_FILE */ + union { + int32_t i; + uint32_t u; + float f; + } imm; +}; + +class src_reg : public reg +{ +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = ralloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + void init() + { + memset(this, 0, sizeof(*this)); + + this->file = BAD_FILE; + } + + src_reg(register_file file, int reg, const glsl_type *type) + { + init(); + + this->file = file; + this->reg = reg; + if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) + this->swizzle = swizzle_for_size(type->vector_elements); + else + this->swizzle = SWIZZLE_XYZW; + } + + /** Generic unset register constructor. */ + src_reg() + { + init(); + } + + src_reg(float f) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_F; + this->imm.f = f; + } + + src_reg(uint32_t u) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_UD; + this->imm.f = u; + } + + src_reg(int32_t i) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_D; + this->imm.i = i; + } + + src_reg(class vec4_visitor *v, const struct glsl_type *type); + + explicit src_reg(dst_reg reg); + + GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */ + bool negate; + bool abs; +}; + +class dst_reg : public reg +{ +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = ralloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + void init() + { + memset(this, 0, sizeof(*this)); + this->file = BAD_FILE; + this->writemask = WRITEMASK_XYZW; + } + + dst_reg() + { + init(); + } + + dst_reg(register_file file, int reg) + { + init(); + + this->file = file; + this->reg = reg; + } + + dst_reg(struct brw_reg reg) + { + init(); + + this->file = HW_REG; + this->fixed_hw_reg = reg; + } + + dst_reg(class vec4_visitor *v, const struct glsl_type *type); + + explicit dst_reg(src_reg reg); + + int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ +}; + +class vec4_instruction : public exec_node { +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = rzalloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + struct brw_reg get_dst(void); + struct brw_reg get_src(int i); + + enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ + dst_reg dst; + src_reg src[3]; + + bool saturate; + bool predicate_inverse; + uint32_t predicate; + + int conditional_mod; /**< BRW_CONDITIONAL_* */ + + int sampler; + int target; /**< MRT target. */ + bool shadow_compare; + + bool eot; + bool header_present; + int mlen; /**< SEND message length */ + int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ + + uint32_t offset; /* spill/unspill offset */ + /** @{ + * Annotation for the generated IR. One of the two can be set. + */ + ir_instruction *ir; + const char *annotation; +}; + +class vec4_visitor : public ir_visitor +{ +public: + vec4_visitor(struct brw_vs_compile *c, + struct gl_shader_program *prog, struct brw_shader *shader); + ~vec4_visitor(); + + dst_reg dst_null_f() + { + return dst_reg(brw_null_reg()); + } + + dst_reg dst_null_d() + { + return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + } + + dst_reg dst_null_cmp() + { + if (intel->gen > 4) + return dst_null_d(); + else + return dst_null_f(); + } + + struct brw_context *brw; + const struct gl_vertex_program *vp; + struct intel_context *intel; + struct gl_context *ctx; + struct brw_vs_compile *c; + struct brw_vs_prog_data *prog_data; + struct brw_compile *p; + struct brw_shader *shader; + struct gl_shader_program *prog; + void *mem_ctx; + exec_list instructions; + + char *fail_msg; + bool failed; + + /** + * GLSL IR currently being processed, which is associated with our + * driver IR instructions for debugging purposes. + */ + ir_instruction *base_ir; + const char *current_annotation; + + int *virtual_grf_sizes; + int virtual_grf_count; + int virtual_grf_array_size; + int first_non_payload_grf; + + dst_reg *variable_storage(ir_variable *var); + + void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); + + src_reg src_reg_for_float(float val); + + /** + * \name Visit methods + * + * As typical for the visitor pattern, there must be one \c visit method for + * each concrete subclass of \c ir_instruction. Virtual base classes within + * the hierarchy should not have \c visit methods. + */ + /*@{*/ + virtual void visit(ir_variable *); + virtual void visit(ir_loop *); + virtual void visit(ir_loop_jump *); + virtual void visit(ir_function_signature *); + virtual void visit(ir_function *); + virtual void visit(ir_expression *); + virtual void visit(ir_swizzle *); + virtual void visit(ir_dereference_variable *); + virtual void visit(ir_dereference_array *); + virtual void visit(ir_dereference_record *); + virtual void visit(ir_assignment *); + virtual void visit(ir_constant *); + virtual void visit(ir_call *); + virtual void visit(ir_return *); + virtual void visit(ir_discard *); + virtual void visit(ir_texture *); + virtual void visit(ir_if *); + /*@}*/ + + src_reg result; + + /* Regs for vertex results. Generated at ir_variable visiting time + * for the ir->location's used. + */ + dst_reg output_reg[VERT_RESULT_MAX]; + + struct hash_table *variable_ht; + + bool run(void); + void fail(const char *msg, ...); + + int virtual_grf_alloc(int size); + int setup_attributes(int payload_reg); + void setup_payload(); + void reg_allocate_trivial(); + void reg_allocate(); + + vec4_instruction *emit(enum opcode opcode); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1, src_reg src2); + + /** Walks an exec_list of ir_instruction and sends it through this visitor. */ + void visit_instructions(const exec_list *list); + + void emit_bool_to_cond_code(ir_rvalue *ir); + void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1); + void emit_if_gen6(ir_if *ir); + + void emit_block_move(ir_assignment *ir); + + /** + * Emit the correct dot-product instruction for the type of arguments + */ + void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, src_reg src0); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, src_reg src0, src_reg src1); + + void emit_scs(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, const src_reg &src); + + void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + + int emit_vue_header_gen6(int header_mrf); + int emit_vue_header_gen4(int header_mrf); + void emit_urb_writes(void); + + GLboolean try_emit_sat(ir_expression *ir); + + bool process_move_condition(ir_rvalue *ir); + + void generate_code(); + void generate_vs_instruction(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg *src); + void generate_math1_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src); + void generate_math1_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src); + void generate_urb_write(vec4_instruction *inst); +}; + +} /* namespace brw */ + +#endif /* BRW_VEC4_H */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp new file mode 100644 index 0000000..bdc7a79 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -0,0 +1,568 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +#include "../glsl/ir_print_visitor.h" + +extern "C" { +#include "brw_eu.h" +}; + +using namespace brw; + +namespace brw { + +int +vec4_visitor::setup_attributes(int payload_reg) +{ + int nr_attributes; + int attribute_map[VERT_ATTRIB_MAX]; + + nr_attributes = 0; + for (int i = 0; i < VERT_ATTRIB_MAX; i++) { + if (prog_data->inputs_read & BITFIELD64_BIT(i)) { + attribute_map[i] = payload_reg + nr_attributes; + nr_attributes++; + } + } + + foreach_iter(exec_list_iterator, iter, this->instructions) { + vec4_instruction *inst = (vec4_instruction *)iter.get(); + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file != ATTR) + continue; + + inst->src[i].file = HW_REG; + inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0); + inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle; + } + } + + /* The BSpec says we always have to read at least one thing from + * the VF, and it appears that the hardware wedges otherwise. + */ + if (nr_attributes == 0) + nr_attributes = 1; + + prog_data->urb_read_length = (nr_attributes + 1) / 2; + + return nr_attributes; +} + +void +vec4_visitor::setup_payload(void) +{ + int reg = 0; + + /* r0 is always reserved, as it contains the payload with the URB + * handles that are passed on to the URB write at the end of the + * thread. + */ + reg++; + + /* User clip planes from curbe: + */ + if (c->key.nr_userclip) { + if (intel->gen >= 6) { + for (int i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += ALIGN(c->key.nr_userclip, 2) / 2; + } else { + for (int i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2; + } + } + + /* FINISHME: push constants */ + c->prog_data.curb_read_length = reg - 1; + c->prog_data.nr_params = 0; + /* XXX 0 causes a bug elsewhere... */ + if (intel->gen < 6 && c->prog_data.nr_params == 0) + c->prog_data.nr_params = 4; + + reg += setup_attributes(reg); + + this->first_non_payload_grf = reg; +} + +struct brw_reg +vec4_instruction::get_dst(void) +{ + struct brw_reg brw_reg; + + switch (dst.file) { + case GRF: + assert(dst.reg_offset == 0); + brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); + brw_reg = retype(brw_reg, dst.type); + brw_reg.dw1.bits.writemask = dst.writemask; + break; + + case HW_REG: + brw_reg = dst.fixed_hw_reg; + break; + + case BAD_FILE: + brw_reg = brw_null_reg(); + break; + + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + return brw_reg; +} + +struct brw_reg +vec4_instruction::get_src(int i) +{ + struct brw_reg brw_reg; + + switch (src[i].file) { + case GRF: + brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0); + brw_reg = retype(brw_reg, src[i].type); + brw_reg.dw1.bits.swizzle = src[i].swizzle; + if (src[i].abs) + brw_reg = brw_abs(brw_reg); + if (src[i].negate) + brw_reg = negate(brw_reg); + break; + + case IMM: + switch (src[i].type) { + case BRW_REGISTER_TYPE_F: + brw_reg = brw_imm_f(src[i].imm.f); + break; + case BRW_REGISTER_TYPE_D: + brw_reg = brw_imm_d(src[i].imm.i); + break; + case BRW_REGISTER_TYPE_UD: + brw_reg = brw_imm_ud(src[i].imm.u); + break; + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + break; + + case HW_REG: + brw_reg = src[i].fixed_hw_reg; + break; + + case BAD_FILE: + /* Probably unused. */ + brw_reg = brw_null_reg(); + break; + case ATTR: + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + + return brw_reg; +} + +void +vec4_visitor::generate_math1_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math(p, + dst, + brw_math_function(inst->opcode), + BRW_MATH_SATURATE_NONE, + inst->base_mrf, + src, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); +} + +void +vec4_visitor::generate_math1_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math(p, + dst, + brw_math_function(inst->opcode), + BRW_MATH_SATURATE_NONE, + inst->base_mrf, + src, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); +} + +void +vec4_visitor::generate_urb_write(vec4_instruction *inst) +{ + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + inst->base_mrf, /* starting mrf reg nr */ + brw_vec8_grf(0, 0), /* src */ + false, /* allocate */ + true, /* used */ + inst->mlen, + 0, /* response len */ + inst->eot, /* eot */ + inst->eot, /* writes complete */ + inst->offset, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); +} + +void +vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, + struct brw_reg dst, + struct brw_reg *src) +{ + vec4_instruction *inst = (vec4_instruction *)instruction; + + switch (inst->opcode) { + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: + if (intel->gen >= 6) { + generate_math1_gen6(inst, dst, src[0]); + } else { + generate_math1_gen4(inst, dst, src[0]); + } + break; + + case SHADER_OPCODE_POW: + assert(!"finishme"); + break; + + case VS_OPCODE_URB_WRITE: + generate_urb_write(inst); + break; + + default: + if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { + fail("unsupported opcode in `%s' in VS\n", + brw_opcodes[inst->opcode].name); + } else { + fail("Unsupported opcode %d in VS", inst->opcode); + } + } +} + +bool +vec4_visitor::run() +{ + /* Generate FS IR for main(). (the visitor only descends into + * functions called "main"). + */ + foreach_iter(exec_list_iterator, iter, *shader->ir) { + ir_instruction *ir = (ir_instruction *)iter.get(); + base_ir = ir; + ir->accept(this); + } + + emit_urb_writes(); + + if (failed) + return false; + + setup_payload(); + reg_allocate(); + + brw_set_access_mode(p, BRW_ALIGN_16); + + generate_code(); + + return !failed; +} + +void +vec4_visitor::generate_code() +{ + int last_native_inst = p->nr_insn; + const char *last_annotation_string = NULL; + ir_instruction *last_annotation_ir = NULL; + + int loop_stack_array_size = 16; + int loop_stack_depth = 0; + brw_instruction **loop_stack = + rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); + int *if_depth_in_loop = + rzalloc_array(this->mem_ctx, int, loop_stack_array_size); + + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("Native code for vertex shader %d:\n", prog->Name); + } + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + struct brw_reg src[3], dst; + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + if (last_annotation_ir != inst->ir) { + last_annotation_ir = inst->ir; + if (last_annotation_ir) { + printf(" "); + last_annotation_ir->print(); + printf("\n"); + } + } + if (last_annotation_string != inst->annotation) { + last_annotation_string = inst->annotation; + if (last_annotation_string) + printf(" %s\n", last_annotation_string); + } + } + + for (unsigned int i = 0; i < 3; i++) { + src[i] = inst->get_src(i); + } + dst = inst->get_dst(); + + brw_set_conditionalmod(p, inst->conditional_mod); + brw_set_predicate_control(p, inst->predicate); + brw_set_predicate_inverse(p, inst->predicate_inverse); + brw_set_saturate(p, inst->saturate); + + switch (inst->opcode) { + case BRW_OPCODE_MOV: + brw_MOV(p, dst, src[0]); + break; + case BRW_OPCODE_ADD: + brw_ADD(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_MUL: + brw_MUL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_FRC: + brw_FRC(p, dst, src[0]); + break; + case BRW_OPCODE_RNDD: + brw_RNDD(p, dst, src[0]); + break; + case BRW_OPCODE_RNDE: + brw_RNDE(p, dst, src[0]); + break; + case BRW_OPCODE_RNDZ: + brw_RNDZ(p, dst, src[0]); + break; + + case BRW_OPCODE_AND: + brw_AND(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_OR: + brw_OR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_XOR: + brw_XOR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_NOT: + brw_NOT(p, dst, src[0]); + break; + case BRW_OPCODE_ASR: + brw_ASR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_SHR: + brw_SHR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_SHL: + brw_SHL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_CMP: + brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); + break; + case BRW_OPCODE_SEL: + brw_SEL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_IF: + if (inst->src[0].file != BAD_FILE) { + /* The instruction has an embedded compare (only allowed on gen6) */ + assert(intel->gen == 6); + gen6_IF(p, inst->conditional_mod, src[0], src[1]); + } else { + brw_IF(p, BRW_EXECUTE_8); + } + if_depth_in_loop[loop_stack_depth]++; + break; + + case BRW_OPCODE_ELSE: + brw_ELSE(p); + break; + case BRW_OPCODE_ENDIF: + brw_ENDIF(p); + if_depth_in_loop[loop_stack_depth]--; + break; + + case BRW_OPCODE_DO: + loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); + if (loop_stack_array_size <= loop_stack_depth) { + loop_stack_array_size *= 2; + loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, + loop_stack_array_size); + if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, + loop_stack_array_size); + } + if_depth_in_loop[loop_stack_depth] = 0; + break; + + case BRW_OPCODE_BREAK: + brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case BRW_OPCODE_CONTINUE: + /* FINISHME: We need to write the loop instruction support still. */ + if (intel->gen >= 6) + gen6_CONT(p, loop_stack[loop_stack_depth - 1]); + else + brw_CONT(p, if_depth_in_loop[loop_stack_depth]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + + case BRW_OPCODE_WHILE: { + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + if (intel->gen >= 5) + br = 2; + + assert(loop_stack_depth > 0); + loop_stack_depth--; + inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_stack[loop_stack_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } + } + } + } + break; + + default: + generate_vs_instruction(inst, dst, src); + break; + } + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { + if (0) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + } + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + + last_native_inst = p->nr_insn; + } + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("\n"); + } + + ralloc_free(loop_stack); + ralloc_free(if_depth_in_loop); + + brw_set_uip_jip(p); + + /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS + * emit issues, it doesn't get the jump distances into the output, + * which is often something we want to debug. So this is here in + * case you're doing that. + */ + if (0) { + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + for (unsigned int i = 0; i < p->nr_insn; i++) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + } +} + +extern "C" { + +bool +brw_vs_emit(struct brw_vs_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; + struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram; + + if (!prog) + return false; + + struct brw_shader *shader = + (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; + if (!shader) + return false; + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("GLSL IR for native vertex shader %d:\n", prog->Name); + _mesa_print_ir(shader->ir, NULL); + printf("\n\n"); + } + + vec4_visitor v(c, prog, shader); + if (!v.run()) { + /* FINISHME: Cleanly fail, test at link time, etc. */ + assert(!"not reached"); + return false; + } + + return true; +} + +} /* extern "C" */ + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp new file mode 100644 index 0000000..e7f6b28 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -0,0 +1,77 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +#include "../glsl/ir_print_visitor.h" + +using namespace brw; + +namespace brw { + +static void +assign(int *reg_hw_locations, reg *reg) +{ + if (reg->file == GRF) { + reg->reg = reg_hw_locations[reg->reg]; + } +} + +void +vec4_visitor::reg_allocate_trivial() +{ + int last_grf = 0; + int hw_reg_mapping[this->virtual_grf_count]; + int i; + int next; + + /* Note that compressed instructions require alignment to 2 registers. */ + hw_reg_mapping[0] = this->first_non_payload_grf; + next = hw_reg_mapping[0] + this->virtual_grf_sizes[0]; + for (i = 1; i < this->virtual_grf_count; i++) { + hw_reg_mapping[i] = next; + next += this->virtual_grf_sizes[i]; + } + prog_data->total_grf = next; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + vec4_instruction *inst = (vec4_instruction *)iter.get(); + + assign(hw_reg_mapping, &inst->dst); + assign(hw_reg_mapping, &inst->src[0]); + assign(hw_reg_mapping, &inst->src[1]); + assign(hw_reg_mapping, &inst->src[2]); + } + + if (last_grf >= BRW_MAX_GRF) { + fail("Ran out of regs on trivial allocator (%d/%d)\n", + last_grf, BRW_MAX_GRF); + } +} + +void +vec4_visitor::reg_allocate() +{ + reg_allocate_trivial(); +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp new file mode 100644 index 0000000..bba1d81 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -0,0 +1,1649 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +#include "main/macros.h" + +namespace brw { + +src_reg::src_reg(dst_reg reg) +{ + init(); + + this->file = reg.file; + this->reg = reg.reg; + this->reg_offset = reg.reg_offset; + this->type = reg.type; + + int swizzles[4]; + int next_chan = 0; + int last = 0; + + for (int i = 0; i < 4; i++) { + if (!(reg.writemask & (1 << i))) + continue; + + swizzles[next_chan++] = last = i; + } + + for (; next_chan < 4; next_chan++) { + swizzles[next_chan] = last; + } + + this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], + swizzles[2], swizzles[3]); +} + +dst_reg::dst_reg(src_reg reg) +{ + init(); + + this->file = reg.file; + this->reg = reg.reg; + this->reg_offset = reg.reg_offset; + this->type = reg.type; + this->writemask = WRITEMASK_XYZW; +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1, src_reg src2) +{ + vec4_instruction *inst = new(mem_ctx) vec4_instruction(); + + inst->opcode = opcode; + inst->dst = dst; + inst->src[0] = src0; + inst->src[1] = src1; + inst->src[2] = src2; + inst->ir = this->base_ir; + inst->annotation = this->current_annotation; + + this->instructions.push_tail(inst); + + return inst; +} + + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) +{ + return emit(opcode, dst, src0, src1, src_reg()); +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) +{ + assert(dst.writemask != 0); + return emit(opcode, dst, src0, src_reg(), src_reg()); +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode) +{ + return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg()); +} + +void +vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) +{ + static enum opcode dot_opcodes[] = { + BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 + }; + + emit(dot_opcodes[elements - 2], dst, src0, src1); +} + +void +vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) +{ + /* The gen6 math instruction ignores the source modifiers -- + * swizzle, abs, negate, and at least some parts of the register + * region description. Move the source to the corresponding slots + * of the destination generally work. + */ + src_reg expanded = src_reg(this, glsl_type::float_type); + emit(BRW_OPCODE_MOV, dst, src); + src = expanded; + + emit(opcode, dst, src); +} + +void +vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src) +{ + vec4_instruction *inst = emit(opcode, dst, src); + inst->base_mrf = 1; + inst->mlen = 1; +} + +void +vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) +{ + switch (opcode) { + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: + break; + default: + assert(!"not reached: bad math opcode"); + return; + } + + if (intel->gen >= 6) { + return emit_math1_gen6(opcode, dst, src); + } else { + return emit_math1_gen4(opcode, dst, src); + } +} + +void +vec4_visitor::emit_math2_gen6(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + src_reg expanded; + + /* The gen6 math instruction ignores the source modifiers -- + * swizzle, abs, negate, and at least some parts of the register + * region description. Move the sources to temporaries to make it + * generally work. + */ + + expanded = src_reg(this, glsl_type::vec4_type); + emit(BRW_OPCODE_MOV, dst, src0); + src0 = expanded; + + expanded = src_reg(this, glsl_type::vec4_type); + emit(BRW_OPCODE_MOV, dst, src1); + src1 = expanded; + + emit(opcode, dst, src0, src1); +} + +void +vec4_visitor::emit_math2_gen4(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + vec4_instruction *inst = emit(opcode, dst, src0, src1); + inst->base_mrf = 1; + inst->mlen = 2; +} + +void +vec4_visitor::emit_math(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + assert(opcode == SHADER_OPCODE_POW); + + if (intel->gen >= 6) { + return emit_math2_gen6(opcode, dst, src0, src1); + } else { + return emit_math2_gen4(opcode, dst, src0, src1); + } +} + +void +vec4_visitor::visit_instructions(const exec_list *list) +{ + foreach_iter(exec_list_iterator, iter, *list) { + ir_instruction *ir = (ir_instruction *)iter.get(); + + base_ir = ir; + ir->accept(this); + } +} + + +static int +type_size(const struct glsl_type *type) +{ + unsigned int i; + int size; + + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (type->is_matrix()) { + return type->matrix_columns; + } else { + /* Regardless of size of vector, it gets a vec4. This is bad + * packing for things like floats, but otherwise arrays become a + * mess. Hopefully a later pass over the code can pack scalars + * down if appropriate. + */ + return 1; + } + case GLSL_TYPE_ARRAY: + assert(type->length > 0); + return type_size(type->fields.array) * type->length; + case GLSL_TYPE_STRUCT: + size = 0; + for (i = 0; i < type->length; i++) { + size += type_size(type->fields.structure[i].type); + } + return size; + case GLSL_TYPE_SAMPLER: + /* Samplers take up one slot in UNIFORMS[], but they're baked in + * at link time. + */ + return 1; + default: + assert(0); + return 0; + } +} + +int +vec4_visitor::virtual_grf_alloc(int size) +{ + if (virtual_grf_array_size <= virtual_grf_count) { + if (virtual_grf_array_size == 0) + virtual_grf_array_size = 16; + else + virtual_grf_array_size *= 2; + virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, + virtual_grf_array_size); + } + virtual_grf_sizes[virtual_grf_count] = size; + return virtual_grf_count++; +} + +src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) +{ + init(); + + this->file = GRF; + this->reg = v->virtual_grf_alloc(type_size(type)); + + if (type->is_array() || type->is_record()) { + this->swizzle = BRW_SWIZZLE_NOOP; + } else { + this->swizzle = swizzle_for_size(type->vector_elements); + } + + this->type = brw_type_for_base_type(type); +} + +dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) +{ + init(); + + this->file = GRF; + this->reg = v->virtual_grf_alloc(type_size(type)); + + if (type->is_array() || type->is_record()) { + this->writemask = WRITEMASK_XYZW; + } else { + this->writemask = (1 << type->vector_elements) - 1; + } + + this->type = brw_type_for_base_type(type); +} + +dst_reg * +vec4_visitor::variable_storage(ir_variable *var) +{ + return (dst_reg *)hash_table_find(this->variable_ht, var); +} + +void +vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) +{ + ir_expression *expr = ir->as_expression(); + + if (expr) { + src_reg op[2]; + vec4_instruction *inst; + + assert(expr->get_num_operands() <= 2); + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + assert(expr->operands[i]->type->is_scalar()); + + expr->operands[i]->accept(this); + op[i] = this->result; + } + + switch (expr->operation) { + case ir_unop_logic_not: + inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1)); + inst->conditional_mod = BRW_CONDITIONAL_Z; + break; + + case ir_binop_logic_xor: + inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_logic_or: + inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_logic_and: + inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_unop_f2b: + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f)); + } else { + inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]); + } + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_unop_i2b: + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + } else { + inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]); + } + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_equal: + case ir_binop_all_equal: + case ir_binop_nequal: + case ir_binop_any_nequal: + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); + break; + + default: + assert(!"not reached"); + break; + } + return; + } + + ir->accept(this); + + if (intel->gen >= 6) { + vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(), + this->result, src_reg(1)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } else { + vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } +} + +/** + * Emit a gen6 IF statement with the comparison folded into the IF + * instruction. + */ +void +vec4_visitor::emit_if_gen6(ir_if *ir) +{ + ir_expression *expr = ir->condition->as_expression(); + + if (expr) { + src_reg op[2]; + vec4_instruction *inst; + dst_reg temp; + + assert(expr->get_num_operands() <= 2); + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + assert(expr->operands[i]->type->is_scalar()); + + expr->operands[i]->accept(this); + op[i] = this->result; + } + + switch (expr->operation) { + case ir_unop_logic_not: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_Z; + return; + + case ir_binop_logic_xor: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_logic_or: + temp = dst_reg(this, glsl_type::bool_type); + emit(BRW_OPCODE_OR, temp, op[0], op[1]); + inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_logic_and: + temp = dst_reg(this, glsl_type::bool_type); + emit(BRW_OPCODE_AND, temp, op[0], op[1]); + inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_unop_f2b: + inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_unop_i2b: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_equal: + case ir_binop_all_equal: + case ir_binop_nequal: + case ir_binop_any_nequal: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); + return; + default: + assert(!"not reached"); + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + } + return; + } + + ir->condition->accept(this); + + vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(), + this->result, src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; +} + +void +vec4_visitor::visit(ir_variable *ir) +{ + dst_reg *reg = NULL; + + if (variable_storage(ir)) + return; + + switch (ir->mode) { + case ir_var_in: + reg = new(mem_ctx) dst_reg(ATTR, ir->location); + reg->type = brw_type_for_base_type(ir->type); + hash_table_insert(this->variable_ht, reg, ir); + break; + + case ir_var_out: + reg = new(mem_ctx) dst_reg(this, ir->type); + hash_table_insert(this->variable_ht, reg, ir); + + for (int i = 0; i < type_size(ir->type); i++) { + output_reg[ir->location + i] = *reg; + output_reg[ir->location + i].reg_offset = i; + } + break; + + case ir_var_temporary: + reg = new(mem_ctx) dst_reg(this, ir->type); + hash_table_insert(this->variable_ht, reg, ir); + + break; + + case ir_var_uniform: + /* FINISHME: uniforms */ + break; + } +} + +void +vec4_visitor::visit(ir_loop *ir) +{ + ir_dereference_variable *counter = NULL; + + /* We don't want debugging output to print the whole body of the + * loop as the annotation. + */ + this->base_ir = NULL; + + if (ir->counter != NULL) + counter = new(ir) ir_dereference_variable(ir->counter); + + if (ir->from != NULL) { + assert(ir->counter != NULL); + + ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); + + a->accept(this); + delete a; + } + + emit(BRW_OPCODE_DO); + + if (ir->to) { + ir_expression *e = + new(ir) ir_expression(ir->cmp, glsl_type::bool_type, + counter, ir->to); + ir_if *if_stmt = new(ir) ir_if(e); + + ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); + + if_stmt->then_instructions.push_tail(brk); + + if_stmt->accept(this); + + delete if_stmt; + delete e; + delete brk; + } + + visit_instructions(&ir->body_instructions); + + if (ir->increment) { + ir_expression *e = + new(ir) ir_expression(ir_binop_add, counter->type, + counter, ir->increment); + + ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); + + a->accept(this); + delete a; + delete e; + } + + emit(BRW_OPCODE_WHILE); +} + +void +vec4_visitor::visit(ir_loop_jump *ir) +{ + switch (ir->mode) { + case ir_loop_jump::jump_break: + emit(BRW_OPCODE_BREAK); + break; + case ir_loop_jump::jump_continue: + emit(BRW_OPCODE_CONTINUE); + break; + } +} + + +void +vec4_visitor::visit(ir_function_signature *ir) +{ + assert(0); + (void)ir; +} + +void +vec4_visitor::visit(ir_function *ir) +{ + /* Ignore function bodies other than main() -- we shouldn't see calls to + * them since they should all be inlined. + */ + if (strcmp(ir->name, "main") == 0) { + const ir_function_signature *sig; + exec_list empty; + + sig = ir->matching_signature(&empty); + + assert(sig); + + visit_instructions(&sig->body); + } +} + +GLboolean +vec4_visitor::try_emit_sat(ir_expression *ir) +{ + ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); + if (!sat_src) + return false; + + sat_src->accept(this); + src_reg src = this->result; + + this->result = src_reg(this, ir->type); + vec4_instruction *inst; + inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src); + inst->saturate = true; + + return true; +} + +void +vec4_visitor::emit_bool_comparison(unsigned int op, + dst_reg dst, src_reg src0, src_reg src1) +{ + /* original gen4 does destination conversion before comparison. */ + if (intel->gen < 5) + dst.type = src0.type; + + vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1); + inst->conditional_mod = brw_conditional_for_comparison(op); + + dst.type = BRW_REGISTER_TYPE_D; + emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1)); +} + +void +vec4_visitor::visit(ir_expression *ir) +{ + unsigned int operand; + src_reg op[Elements(ir->operands)]; + src_reg result_src; + dst_reg result_dst; + vec4_instruction *inst; + + if (try_emit_sat(ir)) + return; + + for (operand = 0; operand < ir->get_num_operands(); operand++) { + this->result.file = BAD_FILE; + ir->operands[operand]->accept(this); + if (this->result.file == BAD_FILE) { + printf("Failed to get tree for expression operand:\n"); + ir->operands[operand]->print(); + exit(1); + } + op[operand] = this->result; + + /* Matrix expression operands should have been broken down to vector + * operations already. + */ + assert(!ir->operands[operand]->type->is_matrix()); + } + + int vector_elements = ir->operands[0]->type->vector_elements; + if (ir->operands[1]) { + vector_elements = MAX2(vector_elements, + ir->operands[1]->type->vector_elements); + } + + this->result.file = BAD_FILE; + + /* Storage for our result. Ideally for an assignment we'd be using + * the actual storage for the result here, instead. + */ + result_src = src_reg(this, ir->type); + /* convenience for the emit functions below. */ + result_dst = dst_reg(result_src); + /* If nothing special happens, this is the result. */ + this->result = result_src; + /* Limit writes to the channels that will be used by result_src later. + * This does limit this temp's use as a temporary for multi-instruction + * sequences. + */ + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + + switch (ir->operation) { + case ir_unop_logic_not: + /* Note that BRW_OPCODE_NOT is not appropriate here, since it is + * ones complement of the whole register, not just bit 0. + */ + emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1)); + break; + case ir_unop_neg: + op[0].negate = !op[0].negate; + this->result = op[0]; + break; + case ir_unop_abs: + op[0].abs = true; + op[0].negate = false; + this->result = op[0]; + break; + + case ir_unop_sign: + emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f)); + + inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_G; + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f)); + inst->predicate = BRW_PREDICATE_NORMAL; + + inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_L; + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f)); + inst->predicate = BRW_PREDICATE_NORMAL; + + break; + + case ir_unop_rcp: + emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); + break; + + case ir_unop_exp2: + emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); + break; + case ir_unop_log2: + emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); + break; + case ir_unop_exp: + case ir_unop_log: + assert(!"not reached: should be handled by ir_explog_to_explog2"); + break; + case ir_unop_sin: + case ir_unop_sin_reduced: + emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos: + case ir_unop_cos_reduced: + emit_math(SHADER_OPCODE_COS, result_dst, op[0]); + break; + + case ir_unop_dFdx: + case ir_unop_dFdy: + assert(!"derivatives not valid in vertex shader"); + break; + + case ir_unop_noise: + assert(!"not reached: should be handled by lower_noise"); + break; + + case ir_binop_add: + emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]); + break; + case ir_binop_sub: + assert(!"not reached: should be handled by ir_sub_to_add_neg"); + break; + + case ir_binop_mul: + emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); + break; + case ir_binop_div: + assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + case ir_binop_mod: + assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + break; + + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_nequal: { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = brw_conditional_for_comparison(ir->operation); + emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1)); + break; + } + + case ir_binop_all_equal: + /* "==" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_Z; + + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; + } else { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); + } + break; + case ir_binop_any_nequal: + /* "!=" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + } else { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); + } + break; + + case ir_unop_any: + emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + break; + + case ir_binop_logic_xor: + emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); + break; + + case ir_binop_logic_or: + emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); + break; + + case ir_binop_logic_and: + emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); + break; + + case ir_binop_dot: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[0]->type == ir->operands[1]->type); + emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); + break; + + case ir_unop_sqrt: + emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); + break; + case ir_unop_rsq: + emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); + break; + case ir_unop_i2f: + case ir_unop_i2u: + case ir_unop_u2i: + case ir_unop_u2f: + case ir_unop_b2f: + case ir_unop_b2i: + case ir_unop_f2i: + emit(BRW_OPCODE_MOV, result_dst, op[0]); + break; + case ir_unop_f2b: + case ir_unop_i2b: { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1)); + break; + } + + case ir_unop_trunc: + emit(BRW_OPCODE_RNDZ, result_dst, op[0]); + break; + case ir_unop_ceil: + op[0].negate = !op[0].negate; + inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); + this->result.negate = true; + break; + case ir_unop_floor: + inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); + break; + case ir_unop_fract: + inst = emit(BRW_OPCODE_FRC, result_dst, op[0]); + break; + case ir_unop_round_even: + emit(BRW_OPCODE_RNDE, result_dst, op[0]); + break; + + case ir_binop_min: + inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_L; + + inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + case ir_binop_max: + inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_G; + + inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + + case ir_binop_pow: + emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); + break; + + case ir_unop_bit_not: + inst = emit(BRW_OPCODE_NOT, result_dst, op[0]); + break; + case ir_binop_bit_and: + inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); + break; + case ir_binop_bit_xor: + inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); + break; + case ir_binop_bit_or: + inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); + break; + + case ir_binop_lshift: + case ir_binop_rshift: + assert(!"GLSL 1.30 features unsupported"); + break; + + case ir_quadop_vector: + assert(!"not reached: should be handled by lower_quadop_vector"); + break; + } +} + + +void +vec4_visitor::visit(ir_swizzle *ir) +{ + src_reg src; + int i = 0; + int swizzle[4]; + + /* Note that this is only swizzles in expressions, not those on the left + * hand side of an assignment, which do write masking. See ir_assignment + * for that. + */ + + ir->val->accept(this); + src = this->result; + assert(src.file != BAD_FILE); + + if (i < ir->type->vector_elements) { + switch (i) { + case 0: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); + break; + case 1: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y); + break; + case 2: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z); + break; + case 3: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w); + break; + } + } + for (; i < 4; i++) { + /* Replicate the last channel out. */ + swizzle[i] = swizzle[ir->type->vector_elements - 1]; + } + + src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); + + this->result = src; +} + +void +vec4_visitor::visit(ir_dereference_variable *ir) +{ + dst_reg *reg = variable_storage(ir->var); + + if (!reg) { + fail("Failed to find variable storage for %s\n", ir->var->name); + this->result = src_reg(brw_null_reg()); + return; + } + + this->result = src_reg(*reg); +} + +void +vec4_visitor::visit(ir_dereference_array *ir) +{ + ir_constant *constant_index; + src_reg src; + int element_size = type_size(ir->type); + + constant_index = ir->array_index->constant_expression_value(); + + ir->array->accept(this); + src = this->result; + + if (constant_index) { + src.reg_offset += constant_index->value.i[0] * element_size; + } else { +#if 0 /* Variable array index */ + /* Variable index array dereference. It eats the "vec4" of the + * base of the array and an index that offsets the Mesa register + * index. + */ + ir->array_index->accept(this); + + src_reg index_reg; + + if (element_size == 1) { + index_reg = this->result; + } else { + index_reg = src_reg(this, glsl_type::float_type); + + emit(BRW_OPCODE_MUL, dst_reg(index_reg), + this->result, src_reg_for_float(element_size)); + } + + src.reladdr = ralloc(mem_ctx, src_reg); + memcpy(src.reladdr, &index_reg, sizeof(index_reg)); +#endif + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + src.swizzle = swizzle_for_size(ir->type->vector_elements); + else + src.swizzle = BRW_SWIZZLE_NOOP; + + this->result = src; +} + +void +vec4_visitor::visit(ir_dereference_record *ir) +{ + unsigned int i; + const glsl_type *struct_type = ir->record->type; + int offset = 0; + + ir->record->accept(this); + + for (i = 0; i < struct_type->length; i++) { + if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) + break; + offset += type_size(struct_type->fields.structure[i].type); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + this->result.swizzle = swizzle_for_size(ir->type->vector_elements); + else + this->result.swizzle = BRW_SWIZZLE_NOOP; + + this->result.reg_offset += offset; +} + +/** + * We want to be careful in assignment setup to hit the actual storage + * instead of potentially using a temporary like we might with the + * ir_dereference handler. + */ +static dst_reg +get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) +{ + /* The LHS must be a dereference. If the LHS is a variable indexed array + * access of a vector, it must be separated into a series conditional moves + * before reaching this point (see ir_vec_index_to_cond_assign). + */ + assert(ir->as_dereference()); + ir_dereference_array *deref_array = ir->as_dereference_array(); + if (deref_array) { + assert(!deref_array->array->type->is_vector()); + } + + /* Use the rvalue deref handler for the most part. We'll ignore + * swizzles in it and write swizzles using writemask, though. + */ + ir->accept(v); + return dst_reg(v->result); +} + +void +vec4_visitor::emit_block_move(ir_assignment *ir) +{ + ir->rhs->accept(this); + src_reg src = this->result; + + dst_reg dst = get_assignment_lhs(ir->lhs, this); + + /* FINISHME: This should really set to the correct maximal writemask for each + * FINISHME: component written (in the loops below). + */ + dst.writemask = WRITEMASK_XYZW; + + for (int i = 0; i < type_size(ir->lhs->type); i++) { + vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); + if (ir->condition) + inst->predicate = BRW_PREDICATE_NORMAL; + + dst.reg_offset++; + src.reg_offset++; + } +} + +void +vec4_visitor::visit(ir_assignment *ir) +{ + if (!ir->lhs->type->is_scalar() && + !ir->lhs->type->is_vector()) { + emit_block_move(ir); + return; + } + + /* Now we're down to just a scalar/vector with writemasks. */ + int i; + + ir->rhs->accept(this); + src_reg src = this->result; + + dst_reg dst = get_assignment_lhs(ir->lhs, this); + + int swizzles[4]; + int first_enabled_chan = 0; + int src_chan = 0; + + assert(ir->lhs->type->is_vector()); + dst.writemask = ir->write_mask; + + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) { + first_enabled_chan = BRW_GET_SWZ(src.swizzle, i); + break; + } + } + + /* Swizzle a small RHS vector into the channels being written. + * + * glsl ir treats write_mask as dictating how many channels are + * present on the RHS while in our instructions we need to make + * those channels appear in the slots of the vec4 they're written to. + */ + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) + swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++); + else + swizzles[i] = first_enabled_chan; + } + src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], + swizzles[2], swizzles[3]); + + if (ir->condition) { + emit_bool_to_cond_code(ir->condition); + } + + for (i = 0; i < type_size(ir->lhs->type); i++) { + vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); + + if (ir->condition) + inst->predicate = BRW_PREDICATE_NORMAL; + + dst.reg_offset++; + src.reg_offset++; + } +} + + +void +vec4_visitor::visit(ir_constant *ir) +{ + if (ir->type->base_type == GLSL_TYPE_STRUCT) { + src_reg temp_base = src_reg(this, ir->type); + dst_reg temp = dst_reg(temp_base); + + foreach_iter(exec_list_iterator, iter, ir->components) { + ir_constant *field_value = (ir_constant *)iter.get(); + int size = type_size(field_value->type); + + assert(size > 0); + + field_value->accept(this); + src_reg src = this->result; + + for (int i = 0; i < (unsigned int)size; i++) { + emit(BRW_OPCODE_MOV, temp, src); + + src.reg_offset++; + temp.reg_offset++; + } + } + this->result = temp_base; + return; + } + + if (ir->type->is_array()) { + src_reg temp_base = src_reg(this, ir->type); + dst_reg temp = dst_reg(temp_base); + int size = type_size(ir->type->fields.array); + + assert(size > 0); + + for (unsigned int i = 0; i < ir->type->length; i++) { + ir->array_elements[i]->accept(this); + src_reg src = this->result; + for (int j = 0; j < size; j++) { + emit(BRW_OPCODE_MOV, temp, src); + + src.reg_offset++; + temp.reg_offset++; + } + } + this->result = temp_base; + return; + } + + if (ir->type->is_matrix()) { + this->result = src_reg(this, ir->type); + dst_reg dst = dst_reg(this->result); + + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + + for (int i = 0; i < ir->type->matrix_columns; i++) { + for (int j = 0; j < ir->type->vector_elements; j++) { + dst.writemask = 1 << j; + emit(BRW_OPCODE_MOV, dst, + src_reg(ir->value.f[i * ir->type->vector_elements + j])); + } + dst.reg_offset++; + } + return; + } + + for (int i = 0; i < ir->type->vector_elements; i++) { + this->result = src_reg(this, ir->type); + dst_reg dst = dst_reg(this->result); + + dst.writemask = 1 << i; + + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i])); + break; + case GLSL_TYPE_INT: + emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i])); + break; + case GLSL_TYPE_UINT: + emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i])); + break; + case GLSL_TYPE_BOOL: + emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i])); + break; + default: + assert(!"Non-float/uint/int/bool constant"); + break; + } + } +} + +void +vec4_visitor::visit(ir_call *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_texture *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_return *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_discard *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_if *ir) +{ + this->base_ir = ir->condition; + ir->condition->accept(this); + assert(this->result.file != BAD_FILE); + + /* FINISHME: condcode */ + emit(BRW_OPCODE_IF); + + visit_instructions(&ir->then_instructions); + + if (!ir->else_instructions.is_empty()) { + this->base_ir = ir->condition; + emit(BRW_OPCODE_ELSE); + + visit_instructions(&ir->else_instructions); + } + + this->base_ir = ir->condition; + emit(BRW_OPCODE_ENDIF); +} + +int +vec4_visitor::emit_vue_header_gen4(int header_mrf) +{ + /* Get the position */ + src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]); + + /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ + dst_reg ndc = dst_reg(this, glsl_type::vec4_type); + + current_annotation = "NDC"; + dst_reg ndc_w = ndc; + ndc_w.writemask = WRITEMASK_W; + src_reg pos_w = pos; + pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); + emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); + + dst_reg ndc_xyz = ndc; + ndc_xyz.writemask = WRITEMASK_XYZ; + + emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w)); + + if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || + c->key.nr_userclip || brw->has_negative_rhw_bug) { + dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); + GLuint i; + + emit(BRW_OPCODE_MOV, header1, 0u); + + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { + assert(!"finishme: psiz"); + src_reg psiz; + + header1.writemask = WRITEMASK_W; + emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11); + emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8); + } + + for (i = 0; i < c->key.nr_userclip; i++) { + vec4_instruction *inst; + + inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()), + pos, src_reg(c->userplane[i])); + inst->conditional_mod = BRW_CONDITIONAL_L; + + emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i); + inst->predicate = BRW_PREDICATE_NORMAL; + } + + /* i965 clipping workaround: + * 1) Test for -ve rhw + * 2) If set, + * set ndc = (0,0,0,0) + * set ucp[6] = 1 + * + * Later, clipping will detect ucp[6] and ensure the primitive is + * clipped against all fixed planes. + */ + if (brw->has_negative_rhw_bug) { +#if 0 + /* FINISHME */ + brw_CMP(p, + vec8(brw_null_reg()), + BRW_CONDITIONAL_L, + brw_swizzle1(ndc, 3), + brw_imm_f(0)); + + brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); + brw_MOV(p, ndc, brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +#endif + } + + header1.writemask = WRITEMASK_XYZW; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1)); + } else { + emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++), + BRW_REGISTER_TYPE_UD), 0u); + } + + if (intel->gen == 5) { + /* There are 20 DWs (D0-D19) in VUE header on Ironlake: + * dword 0-3 (m1) of the header is indices, point width, clip flags. + * dword 4-7 (m2) is the ndc position (set above) + * dword 8-11 (m3) of the vertex header is the 4D space position + * dword 12-19 (m4,m5) of the vertex header is the user clip distance. + * m6 is a pad so that the vertex element data is aligned + * m7 is the first vertex data we fill, which is the vertex position. + */ + current_annotation = "NDC"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); + + /* user clip distance. */ + header_mrf += 2; + + /* Pad so that vertex element data (starts with position) is aligned. */ + header_mrf++; + } else { + /* There are 8 dwords in VUE header pre-Ironlake: + * dword 0-3 (m1) is indices, point width, clip flags. + * dword 4-7 (m2) is ndc position (set above) + * + * dword 8-11 (m3) is the first vertex data, which we always have be the + * vertex position. + */ + current_annotation = "NDC"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); + } + + return header_mrf; +} + +int +vec4_visitor::emit_vue_header_gen6(int header_mrf) +{ + struct brw_reg reg; + + /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: + * dword 0-3 (m2) of the header is indices, point width, clip flags. + * dword 4-7 (m3) is the 4D space position + * dword 8-15 (m4,m5) of the vertex header is the user clip distance if + * enabled. + * + * m4 or 6 is the first vertex element data we fill, which is + * the vertex position. + */ + + current_annotation = "indices, point width, clip flags"; + reg = brw_message_reg(header_mrf++); + emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { + emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W), + src_reg(output_reg[VERT_RESULT_PSIZ])); + } + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, + brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS])); + + current_annotation = "user clip distances"; + if (c->key.nr_userclip) { + for (int i = 0; i < c->key.nr_userclip; i++) { + struct brw_reg m; + if (i < 4) + m = brw_message_reg(header_mrf); + else + m = brw_message_reg(header_mrf + 1); + + emit(BRW_OPCODE_DP4, + dst_reg(brw_writemask(m, 1 << (i & 7))), + src_reg(c->userplane[i])); + } + header_mrf += 2; + } + + current_annotation = NULL; + + return header_mrf; +} + +static int +align_interleaved_urb_mlen(struct brw_context *brw, int mlen) +{ + struct intel_context *intel = &brw->intel; + + if (intel->gen >= 6) { + /* URB data written (does not include the message header reg) must + * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, + * section 5.4.3.2.2: URB_INTERLEAVED. + * + * URB entries are allocated on a multiple of 1024 bits, so an + * extra 128 bits written here to make the end align to 256 is + * no problem. + */ + if ((mlen % 2) != 1) + mlen++; + } + + return mlen; +} + +/** + * Generates the VUE payload plus the 1 or 2 URB write instructions to + * complete the VS thread. + * + * The VUE layout is documented in Volume 2a. + */ +void +vec4_visitor::emit_urb_writes() +{ + int base_mrf = 1; + int mrf = base_mrf; + int urb_entry_size; + + /* FINISHME: edgeflag */ + + /* First mrf is the g0-based message header containing URB handles and such, + * which is implied in VS_OPCODE_URB_WRITE. + */ + mrf++; + + if (intel->gen >= 6) { + mrf = emit_vue_header_gen6(mrf); + } else { + mrf = emit_vue_header_gen4(mrf); + } + + int attr; + for (attr = 0; attr < VERT_RESULT_MAX; attr++) { + if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) + continue; + + /* This is loaded into the VUE header, and thus doesn't occupy + * an attribute slot. + */ + if (attr == VERT_RESULT_PSIZ) + continue; + + emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); + + /* If this is MRF 15, we can't fit anything more into this URB + * WRITE. Note that base_mrf of 1 means that MRF 15 is an + * even-numbered amount of URB write data, which will meet + * gen6's requirements for length alignment. + */ + if (mrf == 15) + break; + } + + vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); + inst->base_mrf = base_mrf; + inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); + inst->eot = true; + + urb_entry_size = mrf - base_mrf; + + for (; attr < VERT_RESULT_MAX; attr++) { + if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) + continue; + fail("Second URB write not supported.\n"); + break; + } + + if (intel->gen == 6) + c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8; + else + c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; +} + +vec4_visitor::vec4_visitor(struct brw_vs_compile *c, + struct gl_shader_program *prog, + struct brw_shader *shader) +{ + this->c = c; + this->p = &c->func; + this->brw = p->brw; + this->intel = &brw->intel; + this->ctx = &intel->ctx; + this->prog = prog; + this->shader = shader; + + this->mem_ctx = ralloc_context(NULL); + this->failed = false; + + this->base_ir = NULL; + this->current_annotation = NULL; + + this->c = c; + this->prog_data = &c->prog_data; + + this->variable_ht = hash_table_ctor(0, + hash_table_pointer_hash, + hash_table_pointer_compare); + + this->virtual_grf_sizes = NULL; + this->virtual_grf_count = 0; + this->virtual_grf_array_size = 0; +} + +vec4_visitor::~vec4_visitor() +{ + hash_table_dtor(this->variable_ht); +} + + +void +vec4_visitor::fail(const char *format, ...) +{ + va_list va; + char *msg; + + if (failed) + return; + + failed = true; + + va_start(va, format); + msg = ralloc_vasprintf(mem_ctx, format, va); + va_end(va); + msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg); + + this->fail_msg = msg; + + if (INTEL_DEBUG & DEBUG_VS) { + fprintf(stderr, "%s", msg); + } +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index a9ad531..bd0677d 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -30,6 +30,7 @@ */ +#include "main/compiler.h" #include "brw_context.h" #include "brw_vs.h" #include "brw_util.h" @@ -50,6 +51,7 @@ static void do_vs_prog( struct brw_context *brw, void *mem_ctx; int aux_size; int i; + static int new_vs = -1; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); @@ -85,7 +87,15 @@ static void do_vs_prog( struct brw_context *brw, /* Emit GEN4 code. */ - brw_vs_emit(&c); + if (new_vs == -1) + new_vs = getenv("INTEL_NEW_VS") != NULL; + + if (new_vs) { + if (!brw_vs_emit(&c)) + brw_old_vs_emit(&c); + } else { + brw_old_vs_emit(&c); + } /* get the program */ diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 432994a..9f9fed3 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -92,6 +92,7 @@ struct brw_vs_compile { GLboolean needs_stack; }; -void brw_vs_emit( struct brw_vs_compile *c ); +bool brw_vs_emit(struct brw_vs_compile *c); +void brw_old_vs_emit(struct brw_vs_compile *c); #endif diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index dbabb44..a06a2bb 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1903,7 +1903,7 @@ brw_vs_rescale_gl_fixed(struct brw_vs_compile *c) /* Emit the vertex program instructions here. */ -void brw_vs_emit(struct brw_vs_compile *c ) +void brw_old_vs_emit(struct brw_vs_compile *c ) { #define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32