i915: Keith Whitwell's swizzling TEX patch. fix #8283
authorXiang, Haihao <haihao.xiang@intel.com>
Mon, 7 Jan 2008 06:08:36 +0000 (14:08 +0800)
committerXiang, Haihao <haihao.xiang@intel.com>
Mon, 7 Jan 2008 06:08:36 +0000 (14:08 +0800)
src/mesa/drivers/dri/i915/i915_context.h
src/mesa/drivers/dri/i915/i915_fragprog.c
src/mesa/drivers/dri/i915/i915_program.c
src/mesa/drivers/dri/i915/i915_program.h

index 1070de1..c6958dd 100644 (file)
@@ -29,6 +29,7 @@
 #define I915CONTEXT_INC
 
 #include "intel_context.h"
+#include "i915_reg.h"
 
 #define I915_FALLBACK_TEXTURE           0x1000
 #define I915_FALLBACK_COLORMASK                 0x2000
 
 #define I915_PROGRAM_SIZE      192
 
+#define I915_MAX_INSN          (I915_MAX_TEX_INSN+I915_MAX_ALU_INSN)
 
 /* Hardware version of a parsed fragment program.  "Derived" from the
  * mesa fragment_program struct.
@@ -161,6 +163,10 @@ struct i915_fragment_program
                                  */
 
 
+   /* Track which R registers are "live" for each instruction.
+    * A register is live between the time it's written to and the last time
+    * it's read. */
+   GLuint usedRegs[I915_MAX_INSN];
 
    /* Helpers for i915_fragprog.c:
     */
index 4c3f223..bafc8f0 100644 (file)
@@ -231,7 +231,7 @@ do {                                                                \
    GLuint coord = src_vector( p, &inst->SrcReg[0], program);   \
    /* Texel lookup */                                          \
                                                                \
-   i915_emit_texld( p,                                         \
+   i915_emit_texld( p, get_live_regs(p, inst),                                         \
               get_result_vector( p, inst ),                    \
               get_result_flags( inst ),                        \
               sampler,                                         \
@@ -254,6 +254,43 @@ do {                                                                       \
 #define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 )
 #define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 )
 
+/* 
+ * TODO: consider moving this into core 
+ */
+static void calc_live_regs( struct i915_fragment_program *p )
+{
+    const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current;
+    GLuint regsUsed = 0xffff0000;
+    GLint i;
+   
+    for (i = program->Base.NumInstructions - 1; i >= 0; i--) {
+        struct prog_instruction *inst = &program->Base.Instructions[i];
+        int opArgs = _mesa_num_inst_src_regs(inst->Opcode);
+        int a;
+
+        /* Register is written to: unmark as live for this and preceeding ops */ 
+        if (inst->DstReg.File == PROGRAM_TEMPORARY)
+            regsUsed &= ~(1 << inst->DstReg.Index);
+
+        for (a = 0; a < opArgs; a++) {
+            /* Register is read from: mark as live for this and preceeding ops */ 
+            if (inst->SrcReg[a].File == PROGRAM_TEMPORARY)
+                regsUsed |= 1 << inst->SrcReg[a].Index;
+        }
+
+        p->usedRegs[i] = regsUsed;
+    }
+}
+
+static GLuint get_live_regs( struct i915_fragment_program *p, 
+                             const struct prog_instruction *inst )
+{
+    const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current;
+    GLuint nr = inst - program->Base.Instructions;
+
+    return p->usedRegs[nr];
+}
 
 /* Possible concerns:
  *
@@ -289,6 +326,15 @@ upload_program(struct i915_fragment_program *p)
       return;
    }
 
+   if (program->Base.NumInstructions > I915_MAX_INSN) {
+       i915_program_error( p, "Exceeded max instructions" );
+       return;
+    }
+
+   /* Not always needed:
+    */
+   calc_live_regs(p);
+
    while (1) {
       GLuint src0, src1, src2, flags;
       GLuint tmp = 0;
@@ -423,7 +469,8 @@ upload_program(struct i915_fragment_program *p)
          src0 = src_vector(p, &inst->SrcReg[0], program);
          tmp = i915_get_utemp(p);
 
-         i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL,   /* use a dummy dest reg */
+         i915_emit_texld(p, get_live_regs(p, inst),
+                         tmp, A0_DEST_CHANNEL_ALL,   /* use a dummy dest reg */
                          0, src0, T0_TEXKILL);
          break;
 
index c6c6434..f79d00d 100644 (file)
@@ -194,27 +194,43 @@ i915_emit_arith(struct i915_fragment_program * p,
    return dest;
 }
 
+static GLuint get_free_rreg (struct i915_fragment_program *p, 
+                             GLuint live_regs)
+{
+    int bit = ffs(~live_regs);
+    if (!bit) {
+        i915_program_error(p, "Can't find free R reg");
+        return UREG_BAD;
+    }
+    return UREG(REG_TYPE_R, bit - 1);
+}
+
 GLuint i915_emit_texld( struct i915_fragment_program *p,
+                       GLuint live_regs,               
                        GLuint dest,
                        GLuint destmask,
                        GLuint sampler,
                        GLuint coord,
                        GLuint op )
 {
-   if (coord != UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord))) {
-      /* No real way to work around this in the general case - need to
-       * allocate and declare a new temporary register (a utemp won't
-       * do).  Will fallback for now.
-       */
-      i915_program_error(p, "Can't (yet) swizzle TEX arguments");
-      return 0;
-   }
-
-   /* Don't worry about saturate as we only support  
+    if (coord != UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord))) {
+        /* With the help of the "needed registers" table created earlier, pick
+         * a register we can MOV the swizzled TC to (since TEX doesn't support
+         * swizzled sources) */
+        GLuint swizCoord = get_free_rreg(p, live_regs);
+        if (swizCoord == UREG_BAD) 
+            return 0;
+
+        i915_emit_arith( p, A0_MOV, swizCoord, A0_DEST_CHANNEL_ALL, 0, coord, 0, 0 );
+        coord = swizCoord;
+    }
+
+   /* Don't worry about saturate as we only support texture formats
+    * that are always in the 0..1 range.
     */
    if (destmask != A0_DEST_CHANNEL_ALL) {
       GLuint tmp = i915_get_utemp(p);
-      i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op );
+      i915_emit_texld( p, 0, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op );
       i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 );
       return dest;
    }
index 3c12b34..14a3f08 100644 (file)
@@ -112,6 +112,7 @@ extern void i915_release_utemps(struct i915_fragment_program *p);
 
 
 extern GLuint i915_emit_texld(struct i915_fragment_program *p,
+                              GLuint live_regs,
                               GLuint dest,
                               GLuint destmask,
                               GLuint sampler, GLuint coord, GLuint op);