From befcce264c8bf8fdac233e6a01cadc595a1d11d3 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Sat, 14 Jan 2012 08:08:33 -0500 Subject: [PATCH] r300/compiler: Add support for inline literals On R500 chips, shader instructions can take 7-bit (3-bit mantissa, 4-bit exponent) floating point values as inputs in place of registers. --- src/gallium/drivers/r300/Makefile.sources | 1 + src/gallium/drivers/r300/compiler/r3xx_fragprog.c | 1 + src/gallium/drivers/r300/compiler/r500_fragprog.c | 2 + .../drivers/r300/compiler/r500_fragprog_emit.c | 2 + .../drivers/r300/compiler/radeon_compiler.c | 16 ++- .../drivers/r300/compiler/radeon_compiler.h | 1 + .../drivers/r300/compiler/radeon_dataflow.h | 1 + .../drivers/r300/compiler/radeon_inline_literals.c | 140 +++++++++++++++++++++ .../drivers/r300/compiler/radeon_pair_translate.c | 10 +- .../r300/compiler/radeon_program_constants.h | 7 +- .../drivers/r300/compiler/radeon_program_pair.h | 2 +- .../drivers/r300/compiler/radeon_program_print.c | 18 +++ 12 files changed, 192 insertions(+), 9 deletions(-) create mode 100644 src/gallium/drivers/r300/compiler/radeon_inline_literals.c diff --git a/src/gallium/drivers/r300/Makefile.sources b/src/gallium/drivers/r300/Makefile.sources index 90105d6..e27b14e 100644 --- a/src/gallium/drivers/r300/Makefile.sources +++ b/src/gallium/drivers/r300/Makefile.sources @@ -28,6 +28,7 @@ C_SOURCES := \ compiler/radeon_compiler_util.c \ compiler/radeon_emulate_branches.c \ compiler/radeon_emulate_loops.c \ + compiler/radeon_inline_literals.c \ compiler/radeon_program.c \ compiler/radeon_program_print.c \ compiler/radeon_opcodes.c \ diff --git a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c index 8fea4db..8ef2d24 100644 --- a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c +++ b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c @@ -125,6 +125,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {"emulate loops", 1, !is_r500, rc_emulate_loops, NULL}, {"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL}, {"dataflow optimize", 1, opt, rc_optimize, NULL}, + {"inline literals", 1, is_r500 && opt, rc_inline_literals, NULL}, {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, {"pair translate", 1, 1, rc_pair_translate, NULL}, diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.c b/src/gallium/drivers/r300/compiler/r500_fragprog.c index cf99f5e..499aa92 100644 --- a/src/gallium/drivers/r300/compiler/r500_fragprog.c +++ b/src/gallium/drivers/r300/compiler/r500_fragprog.c @@ -218,6 +218,8 @@ static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) return 1; return 0; + } else if (reg.File == RC_FILE_INLINE) { + return 1; } else { /* ALU instructions support almost everything */ relevant = 0; diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c index 87b96d1..f6b6c0f 100644 --- a/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c +++ b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c @@ -210,6 +210,8 @@ static unsigned int use_source(struct r500_fragment_program_code* code, struct r } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { use_temporary(code, src.Index); return src.Index; + } else if (src.File == RC_FILE_INLINE) { + return src.Index | (1 << 7); } return 0; diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.c b/src/gallium/drivers/r300/compiler/radeon_compiler.c index 986e3b7..4d4eb64 100644 --- a/src/gallium/drivers/r300/compiler/radeon_compiler.c +++ b/src/gallium/drivers/r300/compiler/radeon_compiler.c @@ -357,21 +357,22 @@ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) static void reg_count_callback(void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { - int *max_reg = userdata; + struct rc_program_stats *s = userdata; if (file == RC_FILE_TEMPORARY) - (int)index > *max_reg ? *max_reg = index : 0; + (int)index > s->num_temp_regs ? s->num_temp_regs = index : 0; + if (file == RC_FILE_INLINE) + s->num_inline_literals++; } void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) { - int max_reg = -1; struct rc_instruction * tmp; memset(s, 0, sizeof(*s)); for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; tmp = tmp->Next){ const struct rc_opcode_info * info; - rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg); + rc_for_all_reads_mask(tmp, reg_count_callback, s); if (tmp->Type == RC_INSTRUCTION_NORMAL) { info = rc_get_opcode_info(tmp->U.I.Opcode); if (info->Opcode == RC_OPCODE_BEGIN_TEX) @@ -405,7 +406,9 @@ void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) s->num_tex_insts++; s->num_insts++; } - s->num_temp_regs = max_reg + 1; + /* Increment here because the reg_count_callback store the max + * temporary reg index in s->nun_temp_regs. */ + s->num_temp_regs++; } static void print_stats(struct radeon_compiler * c) @@ -437,10 +440,11 @@ static void print_stats(struct radeon_compiler * c) "~%4u Presub Operations\n" "~%4u OMOD Operations\n" "~%4u Temporary Registers\n" + "~%4u Inline Literals\n" "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", s.num_insts, s.num_rgb_insts, s.num_alpha_insts, s.num_fc_insts, s.num_tex_insts, s.num_presub_ops, - s.num_omod_ops, s.num_temp_regs); + s.num_omod_ops, s.num_temp_regs, s.num_inline_literals); break; default: assert(0); diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.h b/src/gallium/drivers/r300/compiler/radeon_compiler.h index ac9691c..e7ccbb7 100644 --- a/src/gallium/drivers/r300/compiler/radeon_compiler.h +++ b/src/gallium/drivers/r300/compiler/radeon_compiler.h @@ -161,6 +161,7 @@ struct rc_program_stats { unsigned num_presub_ops; unsigned num_temp_regs; unsigned num_omod_ops; + unsigned num_inline_literals; }; void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s); diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.h b/src/gallium/drivers/r300/compiler/radeon_dataflow.h index d8a6272..bb8d482 100644 --- a/src/gallium/drivers/r300/compiler/radeon_dataflow.h +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.h @@ -130,5 +130,6 @@ void rc_dataflow_swizzles(struct radeon_compiler * c, void *user); /*@}*/ void rc_optimize(struct radeon_compiler * c, void *user); +void rc_inline_literals(struct radeon_compiler *c, void *user); #endif /* RADEON_DATAFLOW_H */ diff --git a/src/gallium/drivers/r300/compiler/radeon_inline_literals.c b/src/gallium/drivers/r300/compiler/radeon_inline_literals.c new file mode 100644 index 0000000..568a3d6 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_inline_literals.c @@ -0,0 +1,140 @@ + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_program.h" +#include "radeon_program_constants.h" +#include + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +/* IEEE-754: + * 22:0 mantissa + * 30:23 exponent + * 31 sign + * + * R300: + * 0:2 mantissa + * 3:6 exponent (bias 7) + */ +static int ieee_754_to_r300_float(float f, unsigned char *r300_float_out) +{ + unsigned float_bits = *((unsigned *)&f); + /* XXX: Handle big-endian */ + unsigned mantissa = float_bits & 0x007fffff; + unsigned biased_exponent = (float_bits & 0x7f800000) >> 23; + unsigned negate = !!(float_bits & 0x80000000); + int exponent = biased_exponent - 127; + unsigned mantissa_mask = 0xff8fffff; + unsigned r300_exponent, r300_mantissa; + + DBG("Converting %f (0x%x) to 7-bit:\n", f, float_bits); + DBG("Raw exponent = %d\n", exponent); + + if (exponent < -7 || exponent > 8) { + DBG("Failed exponent out of range\n\n"); + return 0; + } + + if (mantissa & mantissa_mask) { + DBG("Failed mantisa has too many bits:\n" + "manitssa=0x%x mantissa_mask=0x%x, and=0x%x\n\n", + mantissa, mantissa_mask, + mantissa & mantissa_mask); + return 0; + } + + r300_exponent = exponent + 7; + r300_mantissa = (mantissa & ~mantissa_mask) >> 20; + *r300_float_out = r300_mantissa | (r300_exponent << 3); + + DBG("Success! r300_float = 0x%x\n\n", *r300_float_out); + + if (negate) + return -1; + else + return 1; +} + +void rc_inline_literals(struct radeon_compiler *c, void *user) +{ + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + + unsigned src_idx; + struct rc_constant * constant; + float float_value; + unsigned char r300_float; + int ret; + + /* XXX: Handle presub */ + + /* We aren't using rc_for_all_reads_src here, because presub + * sources need to be handled differently. */ + for (src_idx = 0; src_idx < info->NumSrcRegs; src_idx++) { + unsigned new_swizzle; + unsigned use_literal = 0; + unsigned negate_mask = 0; + unsigned swz, chan; + struct rc_src_register * src_reg = + &inst->U.I.SrcReg[src_idx]; + swz = RC_SWIZZLE_UNUSED; + if (src_reg->File != RC_FILE_CONSTANT) { + continue; + } + constant = + &c->Program.Constants.Constants[src_reg->Index]; + if (constant->Type != RC_CONSTANT_IMMEDIATE) { + continue; + } + new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); + for (chan = 0; chan < 4; chan++) { + unsigned char r300_float_tmp; + swz = GET_SWZ(src_reg->Swizzle, chan); + if (swz == RC_SWIZZLE_UNUSED) { + continue; + } + float_value = constant->u.Immediate[swz]; + ret = ieee_754_to_r300_float(float_value, + &r300_float_tmp); + if (!ret || (use_literal && + r300_float != r300_float_tmp)) { + use_literal = 0; + break; + } + + if (ret == -1 && src_reg->Abs) { + use_literal = 0; + break; + } + + if (!use_literal) { + r300_float = r300_float_tmp; + use_literal = 1; + } + + /* Use RC_SWIZZLE_W for the inline constant, so + * it will become one of the alpha sources. */ + SET_SWZ(new_swizzle, chan, RC_SWIZZLE_W); + if (ret == -1) { + negate_mask |= (1 << chan); + } + } + + if (!use_literal) { + continue; + } + src_reg->File = RC_FILE_INLINE; + src_reg->Index = r300_float; + src_reg->Swizzle = new_swizzle; + src_reg->Negate = src_reg->Negate ^ negate_mask; + } + } +} diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_translate.c b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c index 7d9c8d1..c6050bd 100644 --- a/src/gallium/drivers/r300/compiler/radeon_pair_translate.c +++ b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c @@ -268,7 +268,15 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, pair->Alpha.Arg[i].Source = source; pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1); pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W); + + if (istranscendent) { + pair->Alpha.Arg[i].Negate = + !!(inst->SrcReg[i].Negate & + inst->DstReg.WriteMask); + } else { + pair->Alpha.Arg[i].Negate = + !!(inst->SrcReg[i].Negate & RC_MASK_W); + } } } diff --git a/src/gallium/drivers/r300/compiler/radeon_program_constants.h b/src/gallium/drivers/r300/compiler/radeon_program_constants.h index 4f59c47..c07c492 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_constants.h +++ b/src/gallium/drivers/r300/compiler/radeon_program_constants.h @@ -85,7 +85,12 @@ typedef enum { * Indicates this register should use the result of the presubtract * operation. */ - RC_FILE_PRESUB + RC_FILE_PRESUB, + + /** + * Indicates that the source index has been encoded as a 7-bit float. + */ + RC_FILE_INLINE } rc_register_file; enum { diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.h b/src/gallium/drivers/r300/compiler/radeon_program_pair.h index b6eb0eb..085ff99 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_pair.h +++ b/src/gallium/drivers/r300/compiler/radeon_program_pair.h @@ -57,7 +57,7 @@ struct radeon_compiler; struct rc_pair_instruction_source { unsigned int Used:1; - unsigned int File:3; + unsigned int File:4; unsigned int Index:RC_REGISTER_INDEX_BITS; }; diff --git a/src/gallium/drivers/r300/compiler/radeon_program_print.c b/src/gallium/drivers/r300/compiler/radeon_program_print.c index dc40d7f..e3d2104 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_print.c +++ b/src/gallium/drivers/r300/compiler/radeon_program_print.c @@ -109,6 +109,22 @@ static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func fun } } +static void rc_print_inline_float(FILE * f, int index) +{ + int r300_exponent = (index >> 3) & 0xf; + unsigned r300_mantissa = index & 0x7; + unsigned float_exponent; + unsigned real_float; + float * print_float = (float*) &real_float; + + r300_exponent -= 7; + float_exponent = r300_exponent + 127; + real_float = (r300_mantissa << 20) | (float_exponent << 23); + + fprintf(f, "%f (0x%x)", *print_float, index); + +} + static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr) { if (file == RC_FILE_NONE) { @@ -118,6 +134,8 @@ static void rc_print_register(FILE * f, rc_register_file file, int index, unsign case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break; default: fprintf(f, "special[%i]", index); break; } + } else if (file == RC_FILE_INLINE) { + rc_print_inline_float(f, index); } else { const char * filename; switch(file) { -- 2.7.4