Initial check-in of i915 fragment program translation (from tgsi).
authorBrian <brian.paul@tungstengraphics.com>
Tue, 21 Aug 2007 22:24:38 +0000 (16:24 -0600)
committerBrian <brian.paul@tungstengraphics.com>
Tue, 21 Aug 2007 22:24:38 +0000 (16:24 -0600)
src/mesa/pipe/i915simple/i915_fpc.c [new file with mode: 0644]
src/mesa/pipe/i915simple/i915_fpc.h [new file with mode: 0644]
src/mesa/pipe/i915simple/i915_fpc_debug.c [new file with mode: 0644]
src/mesa/pipe/i915simple/i915_fpc_emit.c [new file with mode: 0644]
src/mesa/pipe/i915simple/i915_fpc_translate.c [new file with mode: 0644]

diff --git a/src/mesa/pipe/i915simple/i915_fpc.c b/src/mesa/pipe/i915simple/i915_fpc.c
new file mode 100644 (file)
index 0000000..fd0bbbc
--- /dev/null
@@ -0,0 +1,183 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#if 0
+#include <strings.h>
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+#endif
+
+#include "i915_fpc.h"
+
+
+
+void
+i915_program_error(struct i915_fp_compile *p, const char *msg)
+{
+   fprintf(stderr, "i915_program_error: %s", msg);
+   p->fp->error = 1;
+}
+
+
+static struct i915_fp_compile *
+i915_init_compile(struct i915_context *i915, struct i915_fragment_program *fp)
+{
+   struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
+
+   p->fp = fp;
+#if 0
+   p->env_param = NULL; /*i915->intel.ctx.FragmentProgram.Parameters;*/
+#endif
+   p->constants = i915->fs.constants;
+   p->nr_tex_indirect = 1;      /* correct? */
+   p->nr_tex_insn = 0;
+   p->nr_alu_insn = 0;
+   p->nr_decl_insn = 0;
+
+   memset(p->constant_flags, 0, sizeof(p->constant_flags));
+
+   p->csr = p->program;
+   p->decl = p->declarations;
+   p->decl_s = 0;
+   p->decl_t = 0;
+   p->temp_flag = 0xffff000;
+   p->utemp_flag = ~0x7;
+
+#if 0
+   p->fp->translated = 0;
+   p->fp->error = 0;
+   p->fp->nr_constants = 0;
+#endif
+   p->fp->wpos_tex = -1;
+   p->fp->nr_params = 0;
+
+   *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
+
+   return p;
+}
+
+/* Copy compile results to the fragment program struct and destroy the
+ * compilation context.
+ */
+static void
+i915_fini_compile(struct i915_fp_compile *p)
+{
+   uint program_size = p->csr - p->program;
+   uint decl_size = p->decl - p->declarations;
+
+   if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT)
+      i915_program_error(p, "Exceeded max nr indirect texture lookups");
+
+   if (p->nr_tex_insn > I915_MAX_TEX_INSN)
+      i915_program_error(p, "Exceeded max TEX instructions");
+
+   if (p->nr_alu_insn > I915_MAX_ALU_INSN)
+      i915_program_error(p, "Exceeded max ALU instructions");
+
+   if (p->nr_decl_insn > I915_MAX_DECL_INSN)
+      i915_program_error(p, "Exceeded max DECL instructions");
+
+   if (p->fp->error) {
+      p->fp->NumNativeInstructions = 0;
+      p->fp->NumNativeAluInstructions = 0;
+      p->fp->NumNativeTexInstructions = 0;
+      p->fp->NumNativeTexIndirections = 0;
+      return;
+   }
+   else {
+      p->fp->NumNativeInstructions = (p->nr_alu_insn +
+                                      p->nr_tex_insn +
+                                      p->nr_decl_insn);
+      p->fp->NumNativeAluInstructions = p->nr_alu_insn;
+      p->fp->NumNativeTexInstructions = p->nr_tex_insn;
+      p->fp->NumNativeTexIndirections = p->nr_tex_indirect;
+   }
+
+   p->declarations[0] |= program_size + decl_size - 2;
+
+   /* Copy compilation results to fragment program struct: 
+    */
+   memcpy(p->fp->program, 
+         p->declarations, 
+         decl_size * sizeof(uint));
+
+   memcpy(p->fp->program + decl_size, 
+         p->program, 
+         program_size * sizeof(uint));
+      
+   p->fp->program_size = program_size + decl_size;
+
+   /* Release the compilation struct: 
+    */
+   free(p);
+}
+
+
+/**
+ * Find an unused texture coordinate slot to use for fragment WPOS.
+ * Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found).
+ */
+static void
+find_wpos_space(struct i915_fp_compile *p)
+{
+   const uint inputs = p->shader->inputs_read;
+   uint i;
+
+   p->fp->wpos_tex = -1;
+
+   if (inputs & FRAG_BIT_WPOS) {
+      for (i = 0; i < I915_TEX_UNITS; i++) {
+        if ((inputs & (FRAG_BIT_TEX0 << i)) == 0) {
+           p->fp->wpos_tex = i;
+           return;
+        }
+      }
+
+      i915_program_error(p, "No free texcoord for wpos value");
+   }
+}
+
+
+
+void i915_compile_fragment_program( struct i915_context *i915,
+                                   struct i915_fragment_program *fp )
+{
+   struct i915_fp_compile *p = i915_init_compile(i915, fp);
+   struct tgsi_token *tokens = i915->fs.tokens;
+
+   find_wpos_space(p);
+
+   i915_translate_program(p, tokens);
+   i915_fixup_depth_write(p);
+
+   i915_fini_compile(p);
+#if 0
+   fp->translated = 1;
+#endif
+}
diff --git a/src/mesa/pipe/i915simple/i915_fpc.h b/src/mesa/pipe/i915simple/i915_fpc.h
new file mode 100644 (file)
index 0000000..0a8bffc
--- /dev/null
@@ -0,0 +1,339 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#ifndef I915_FPC_H
+#define I915_FPC_H
+
+#include "pipe/p_util.h"
+
+#include "i915_context.h"
+#include "i915_reg.h"
+
+
+
+#define I915_PROGRAM_SIZE 192
+#define I915_MAX_CONSTANT  32
+
+#define MAX_VARYING 8
+
+enum
+{
+   FRAG_ATTRIB_WPOS = 0,
+   FRAG_ATTRIB_COL0 = 1,
+   FRAG_ATTRIB_COL1 = 2,
+   FRAG_ATTRIB_FOGC = 3,
+   FRAG_ATTRIB_TEX0 = 4,
+   FRAG_ATTRIB_TEX1 = 5,
+   FRAG_ATTRIB_TEX2 = 6,
+   FRAG_ATTRIB_TEX3 = 7,
+   FRAG_ATTRIB_TEX4 = 8,
+   FRAG_ATTRIB_TEX5 = 9,
+   FRAG_ATTRIB_TEX6 = 10,
+   FRAG_ATTRIB_TEX7 = 11,
+   FRAG_ATTRIB_VAR0 = 12,  /**< shader varying */
+   FRAG_ATTRIB_MAX = (FRAG_ATTRIB_VAR0 + MAX_VARYING)
+};
+
+/**
+ * Bitflags for fragment program input attributes.
+ */
+/*@{*/
+#define FRAG_BIT_WPOS  (1 << FRAG_ATTRIB_WPOS)
+#define FRAG_BIT_COL0  (1 << FRAG_ATTRIB_COL0)
+#define FRAG_BIT_COL1  (1 << FRAG_ATTRIB_COL1)
+#define FRAG_BIT_FOGC  (1 << FRAG_ATTRIB_FOGC)
+#define FRAG_BIT_TEX0  (1 << FRAG_ATTRIB_TEX0)
+#define FRAG_BIT_TEX1  (1 << FRAG_ATTRIB_TEX1)
+#define FRAG_BIT_TEX2  (1 << FRAG_ATTRIB_TEX2)
+#define FRAG_BIT_TEX3  (1 << FRAG_ATTRIB_TEX3)
+#define FRAG_BIT_TEX4  (1 << FRAG_ATTRIB_TEX4)
+#define FRAG_BIT_TEX5  (1 << FRAG_ATTRIB_TEX5)
+#define FRAG_BIT_TEX6  (1 << FRAG_ATTRIB_TEX6)
+#define FRAG_BIT_TEX7  (1 << FRAG_ATTRIB_TEX7)
+#define FRAG_BIT_VAR0  (1 << FRAG_ATTRIB_VAR0)
+
+#define MAX_DRAW_BUFFERS 4
+
+enum
+{
+   FRAG_RESULT_COLR = 0,
+   FRAG_RESULT_COLH = 1,
+   FRAG_RESULT_DEPR = 2,
+   FRAG_RESULT_DATA0 = 3,
+   FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS)
+};
+
+
+
+#if 1 /*XXX temp */
+/* Hardware version of a parsed fragment program.  "Derived" from the
+ * mesa fragment_program struct.
+ */
+struct i915_fragment_program
+{
+#if 0
+   struct gl_fragment_program Base;
+#else
+   uint NumNativeInstructions;
+   uint NumNativeAluInstructions;
+   uint NumNativeTexInstructions;
+   uint NumNativeTexIndirections;
+#endif
+
+   boolean error;      /**< Set if i915_program_error() is called */
+#if 0
+   uint id;            /**< String id */
+   boolean translated;
+#endif
+
+   /* Decls + instructions: 
+    */
+   uint program[I915_PROGRAM_SIZE];
+   uint program_size;
+   
+#if 0
+   /* Constant buffer:
+    */
+   float constant[I915_MAX_CONSTANT][4];
+   uint nr_constants;
+#endif
+
+   /* Some of which are parameters: 
+    */
+   struct
+   {
+      uint reg;               /* Hardware constant idx */
+      const float *values;    /* Pointer to tracked values */
+   } param[I915_MAX_CONSTANT];
+   uint nr_params;
+
+#if 0
+   uint param_state;
+#endif
+   uint wpos_tex;
+};
+#endif
+
+
+/***********************************************************************
+ * Public interface for the compiler
+ */
+
+void i915_compile_fragment_program( struct i915_context *i915,
+                                   struct i915_fragment_program *fp );
+
+
+/***********************************************************************
+ * Private details of the compiler
+ */
+
+struct i915_fp_compile {
+   struct i915_fragment_program *fp;
+
+   struct pipe_shader_state *shader;
+
+   uint declarations[I915_PROGRAM_SIZE];
+   uint program[I915_PROGRAM_SIZE];
+
+   uint constant_flags[I915_MAX_CONSTANT];
+
+   struct pipe_constant_buffer *constants;
+
+   uint *csr;                 /* Cursor, points into program.
+                                 */
+
+   uint *decl;                /* Cursor, points into declarations.
+                                 */
+
+   uint decl_s;               /* flags for which s regs need to be decl'd */
+   uint decl_t;               /* flags for which t regs need to be decl'd */
+
+   uint temp_flag;            /* Tracks temporary regs which are in
+                                 * use.
+                                 */
+
+   uint utemp_flag;           /* Tracks TYPE_U temporary regs which are in
+                                 * use.
+                                 */
+
+   uint nr_tex_indirect;
+   uint nr_tex_insn;
+   uint nr_alu_insn;
+   uint nr_decl_insn;
+
+#if 0
+   float (*env_param)[4];
+#endif
+};
+
+
+/* Having zero and one in here makes the definition of swizzle a lot
+ * easier.
+ */
+#define UREG_TYPE_SHIFT               29
+#define UREG_NR_SHIFT                 24
+#define UREG_CHANNEL_X_NEGATE_SHIFT   23
+#define UREG_CHANNEL_X_SHIFT          20
+#define UREG_CHANNEL_Y_NEGATE_SHIFT   19
+#define UREG_CHANNEL_Y_SHIFT          16
+#define UREG_CHANNEL_Z_NEGATE_SHIFT   15
+#define UREG_CHANNEL_Z_SHIFT          12
+#define UREG_CHANNEL_W_NEGATE_SHIFT   11
+#define UREG_CHANNEL_W_SHIFT          8
+#define UREG_CHANNEL_ZERO_NEGATE_MBZ  5
+#define UREG_CHANNEL_ZERO_SHIFT       4
+#define UREG_CHANNEL_ONE_NEGATE_MBZ   1
+#define UREG_CHANNEL_ONE_SHIFT        0
+
+#define UREG_BAD          0xffffffff    /* not a valid ureg */
+
+#define X    SRC_X
+#define Y    SRC_Y
+#define Z    SRC_Z
+#define W    SRC_W
+#define ZERO SRC_ZERO
+#define ONE  SRC_ONE
+
+/* Construct a ureg:
+ */
+#define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) |         \
+                         ((nr)  << UREG_NR_SHIFT) |            \
+                         (X     << UREG_CHANNEL_X_SHIFT) |     \
+                         (Y     << UREG_CHANNEL_Y_SHIFT) |     \
+                         (Z     << UREG_CHANNEL_Z_SHIFT) |     \
+                         (W     << UREG_CHANNEL_W_SHIFT) |     \
+                         (ZERO  << UREG_CHANNEL_ZERO_SHIFT) |  \
+                         (ONE   << UREG_CHANNEL_ONE_SHIFT))
+
+#define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & (0xf<<20))
+#define CHANNEL_SRC( src, channel ) (src>>(channel*4))
+
+#define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)&REG_TYPE_MASK)
+#define GET_UREG_NR(reg)   (((reg)>>UREG_NR_SHIFT)&REG_NR_MASK)
+
+
+
+#define UREG_XYZW_CHANNEL_MASK 0x00ffff00
+
+/* One neat thing about the UREG representation:  
+ */
+static INLINE int
+swizzle(int reg, int x, int y, int z, int w)
+{
+   assert(x < 4);
+   assert(y < 4);
+   assert(z < 4);
+   assert(w < 4);
+   return ((reg & ~UREG_XYZW_CHANNEL_MASK) |
+           CHANNEL_SRC(GET_CHANNEL_SRC(reg, x), 0) |
+           CHANNEL_SRC(GET_CHANNEL_SRC(reg, y), 1) |
+           CHANNEL_SRC(GET_CHANNEL_SRC(reg, z), 2) |
+           CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3));
+}
+
+/* Another neat thing about the UREG representation:  
+ */
+static INLINE int
+negate(int reg, int x, int y, int z, int w)
+{
+   return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
+                 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
+                 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
+                 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
+}
+
+
+extern uint i915_get_temp(struct i915_fp_compile *p);
+extern uint i915_get_utemp(struct i915_fp_compile *p);
+extern void i915_release_utemps(struct i915_fp_compile *p);
+
+
+extern uint i915_emit_texld(struct i915_fp_compile *p,
+                              uint dest,
+                              uint destmask,
+                              uint sampler, uint coord, uint op);
+
+extern uint i915_emit_arith(struct i915_fp_compile *p,
+                              uint op,
+                              uint dest,
+                              uint mask,
+                              uint saturate,
+                              uint src0, uint src1, uint src2);
+
+extern uint i915_emit_decl(struct i915_fp_compile *p,
+                             uint type, uint nr, uint d0_flags);
+
+
+extern uint i915_emit_const1f(struct i915_fp_compile *p, float c0);
+
+extern uint i915_emit_const2f(struct i915_fp_compile *p,
+                                float c0, float c1);
+
+extern uint i915_emit_const4fv(struct i915_fp_compile *p,
+                                 const float * c);
+
+extern uint i915_emit_const4f(struct i915_fp_compile *p,
+                                float c0, float c1,
+                                float c2, float c3);
+
+
+#if 0
+extern uint i915_emit_param4fv(struct i915_fp_compile *p,
+                                 const float * values);
+#endif
+
+
+
+/*======================================================================
+ * i915_fpc_debug.c
+ */
+extern void i915_program_error(struct i915_fp_compile *p,
+                               const char *msg);
+
+
+/*======================================================================
+ * i915_fpc_debug.c
+ */
+extern void i915_disassemble_program(const uint * program, uint sz);
+
+#if 0
+extern void i915_print_mesa_instructions( const struct prog_instruction *insn,
+                                         uint nr );
+#endif
+
+/*======================================================================
+ * i915_fpc_translate.c
+ */
+void i915_fixup_depth_write(struct i915_fp_compile *p);
+
+extern void
+i915_translate_program(struct i915_fp_compile *p, const struct tgsi_token *token);
+
+
+
+#endif
diff --git a/src/mesa/pipe/i915simple/i915_fpc_debug.c b/src/mesa/pipe/i915simple/i915_fpc_debug.c
new file mode 100644 (file)
index 0000000..77deab3
--- /dev/null
@@ -0,0 +1,346 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#if 0
+#include <stdio.h>
+#endif
+
+#include "i915_reg.h"
+#include "i915_fpc.h"
+
+#if 0
+#include "shader/program.h"
+#include "shader/prog_instruction.h"
+#include "shader/prog_print.h"
+#endif
+
+static const char *opcodes[0x20] = {
+   "NOP",
+   "ADD",
+   "MOV",
+   "MUL",
+   "MAD",
+   "DP2ADD",
+   "DP3",
+   "DP4",
+   "FRC",
+   "RCP",
+   "RSQ",
+   "EXP",
+   "LOG",
+   "CMP",
+   "MIN",
+   "MAX",
+   "FLR",
+   "MOD",
+   "TRC",
+   "SGE",
+   "SLT",
+   "TEXLD",
+   "TEXLDP",
+   "TEXLDB",
+   "TEXKILL",
+   "DCL",
+   "0x1a",
+   "0x1b",
+   "0x1c",
+   "0x1d",
+   "0x1e",
+   "0x1f",
+};
+
+
+static const int args[0x20] = {
+   0,                           /* 0 nop */
+   2,                           /* 1 add */
+   1,                           /* 2 mov */
+   2,                           /* 3 m ul */
+   3,                           /* 4 mad */
+   3,                           /* 5 dp2add */
+   2,                           /* 6 dp3 */
+   2,                           /* 7 dp4 */
+   1,                           /* 8 frc */
+   1,                           /* 9 rcp */
+   1,                           /* a rsq */
+   1,                           /* b exp */
+   1,                           /* c log */
+   3,                           /* d cmp */
+   2,                           /* e min */
+   2,                           /* f max */
+   1,                           /* 10 flr */
+   1,                           /* 11 mod */
+   1,                           /* 12 trc */
+   2,                           /* 13 sge */
+   2,                           /* 14 slt */
+   1,
+   1,
+   1,
+   1,
+   0,
+   0,
+   0,
+   0,
+   0,
+   0,
+   0,
+};
+
+
+static const char *regname[0x8] = {
+   "R",
+   "T",
+   "CONST",
+   "S",
+   "OC",
+   "OD",
+   "U",
+   "UNKNOWN",
+};
+
+static void
+print_reg_type_nr(uint type, uint nr)
+{
+   switch (type) {
+   case REG_TYPE_T:
+      switch (nr) {
+      case T_DIFFUSE:
+         printf("T_DIFFUSE");
+         return;
+      case T_SPECULAR:
+         printf("T_SPECULAR");
+         return;
+      case T_FOG_W:
+         printf("T_FOG_W");
+         return;
+      default:
+         printf("T_TEX%d", nr);
+         return;
+      }
+   case REG_TYPE_OC:
+      if (nr == 0) {
+         printf("oC");
+         return;
+      }
+      break;
+   case REG_TYPE_OD:
+      if (nr == 0) {
+         printf("oD");
+         return;
+      }
+      break;
+   default:
+      break;
+   }
+
+   printf("%s[%d]", regname[type], nr);
+}
+
+#define REG_SWIZZLE_MASK 0x7777
+#define REG_NEGATE_MASK 0x8888
+
+#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) | \
+                     (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) |      \
+                     (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) |      \
+                     (SRC_W << A2_SRC2_CHANNEL_W_SHIFT))
+
+
+static void
+print_reg_neg_swizzle(uint reg)
+{
+   int i;
+
+   if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW &&
+       (reg & REG_NEGATE_MASK) == 0)
+      return;
+
+   printf(".");
+
+   for (i = 3; i >= 0; i--) {
+      if (reg & (1 << ((i * 4) + 3)))
+         printf("-");
+
+      switch ((reg >> (i * 4)) & 0x7) {
+      case 0:
+         printf("x");
+         break;
+      case 1:
+         printf("y");
+         break;
+      case 2:
+         printf("z");
+         break;
+      case 3:
+         printf("w");
+         break;
+      case 4:
+         printf("0");
+         break;
+      case 5:
+         printf("1");
+         break;
+      default:
+         printf("?");
+         break;
+      }
+   }
+}
+
+
+static void
+print_src_reg(uint dword)
+{
+   uint nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK;
+   uint type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK;
+   print_reg_type_nr(type, nr);
+   print_reg_neg_swizzle(dword);
+}
+
+
+static void
+print_dest_reg(uint dword)
+{
+   uint nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK;
+   uint type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK;
+   print_reg_type_nr(type, nr);
+   if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL)
+      return;
+   printf(".");
+   if (dword & A0_DEST_CHANNEL_X)
+      printf("x");
+   if (dword & A0_DEST_CHANNEL_Y)
+      printf("y");
+   if (dword & A0_DEST_CHANNEL_Z)
+      printf("z");
+   if (dword & A0_DEST_CHANNEL_W)
+      printf("w");
+}
+
+
+#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT))
+#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT))
+#define GET_SRC2_REG(r)      (r)
+
+
+static void
+print_arith_op(uint opcode, const uint * program)
+{
+   if (opcode != A0_NOP) {
+      print_dest_reg(program[0]);
+      if (program[0] & A0_DEST_SATURATE)
+         printf(" = SATURATE ");
+      else
+         printf(" = ");
+   }
+
+   printf("%s ", opcodes[opcode]);
+
+   print_src_reg(GET_SRC0_REG(program[0], program[1]));
+   if (args[opcode] == 1) {
+      printf("\n");
+      return;
+   }
+
+   printf(", ");
+   print_src_reg(GET_SRC1_REG(program[1], program[2]));
+   if (args[opcode] == 2) {
+      printf("\n");
+      return;
+   }
+
+   printf(", ");
+   print_src_reg(GET_SRC2_REG(program[2]));
+   printf("\n");
+   return;
+}
+
+
+static void
+print_tex_op(uint opcode, const uint * program)
+{
+   print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL);
+   printf(" = ");
+
+   printf("%s ", opcodes[opcode]);
+
+   printf("S[%d],", program[0] & T0_SAMPLER_NR_MASK);
+
+   print_reg_type_nr((program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) &
+                     REG_TYPE_MASK,
+                     (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK);
+   printf("\n");
+}
+
+static void
+print_dcl_op(uint opcode, const uint * program)
+{
+   printf("%s ", opcodes[opcode]);
+   print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL);
+   printf("\n");
+}
+
+
+void
+i915_disassemble_program(const uint * program, uint sz)
+{
+   uint size = program[0] & 0x1ff;
+   int i;
+
+   printf("\t\tBEGIN\n");
+
+   assert(size + 2 == sz);
+
+   program++;
+   for (i = 1; i < sz; i += 3, program += 3) {
+      uint opcode = program[0] & (0x1f << 24);
+
+      printf("\t\t");
+
+      if ((int) opcode >= A0_NOP && opcode <= A0_SLT)
+         print_arith_op(opcode >> 24, program);
+      else if (opcode >= T0_TEXLD && opcode <= T0_TEXKILL)
+         print_tex_op(opcode >> 24, program);
+      else if (opcode == D0_DCL)
+         print_dcl_op(opcode >> 24, program);
+      else
+         printf("Unknown opcode 0x%x\n", opcode);
+   }
+
+   printf("\t\tEND\n\n");
+}
+
+
+#if 0
+void i915_print_mesa_instructions( const struct prog_instruction *insn,
+                                  uint nr )
+{
+   uint i;
+   for (i = 0; i < nr; i++, insn++) {
+      printf("%3d: ", i);
+      print_instruction(insn);
+   }
+}
+#endif
diff --git a/src/mesa/pipe/i915simple/i915_fpc_emit.c b/src/mesa/pipe/i915simple/i915_fpc_emit.c
new file mode 100644 (file)
index 0000000..7259bb5
--- /dev/null
@@ -0,0 +1,430 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#if 0
+#include <strings.h>
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+#endif
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_fpc.h"
+
+
+#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT)
+#define A1_SRC0( reg ) (((reg)&UREG_MASK)<<UREG_A1_SRC0_SHIFT_RIGHT)
+#define A1_SRC1( reg ) (((reg)&UREG_MASK)>>UREG_A1_SRC1_SHIFT_LEFT)
+#define A2_SRC1( reg ) (((reg)&UREG_MASK)<<UREG_A2_SRC1_SHIFT_RIGHT)
+#define A2_SRC2( reg ) (((reg)&UREG_MASK)>>UREG_A2_SRC2_SHIFT_LEFT)
+
+/* These are special, and don't have swizzle/negate bits.
+ */
+#define T0_SAMPLER( reg )     (GET_UREG_NR(reg)<<T0_SAMPLER_NR_SHIFT)
+#define T1_ADDRESS_REG( reg ) ((GET_UREG_NR(reg)<<T1_ADDRESS_REG_NR_SHIFT) | \
+                              (GET_UREG_TYPE(reg)<<T1_ADDRESS_REG_TYPE_SHIFT))
+
+
+/* Macros for translating UREG's into the various register fields used
+ * by the I915 programmable unit.
+ */
+#define UREG_A0_DEST_SHIFT_LEFT  (UREG_TYPE_SHIFT - A0_DEST_TYPE_SHIFT)
+#define UREG_A0_SRC0_SHIFT_LEFT  (UREG_TYPE_SHIFT - A0_SRC0_TYPE_SHIFT)
+#define UREG_A1_SRC0_SHIFT_RIGHT (A1_SRC0_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT)
+#define UREG_A1_SRC1_SHIFT_LEFT  (UREG_TYPE_SHIFT - A1_SRC1_TYPE_SHIFT)
+#define UREG_A2_SRC1_SHIFT_RIGHT (A2_SRC1_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT)
+#define UREG_A2_SRC2_SHIFT_LEFT  (UREG_TYPE_SHIFT - A2_SRC2_TYPE_SHIFT)
+
+#define UREG_MASK         0xffffff00
+#define UREG_TYPE_NR_MASK ((REG_TYPE_MASK << UREG_TYPE_SHIFT) | \
+                          (REG_NR_MASK << UREG_NR_SHIFT))
+
+
+#define I915_CONSTFLAG_PARAM 0x1f
+
+uint
+i915_get_temp(struct i915_fp_compile *p)
+{
+   int bit = ffs(~p->temp_flag);
+   if (!bit) {
+      i915_program_error(p, "i915_get_temp: out of temporaries\n");
+      return 0;
+   }
+
+   p->temp_flag |= 1 << (bit - 1);
+   return UREG(REG_TYPE_R, (bit - 1));
+}
+
+
+uint
+i915_get_utemp(struct i915_fp_compile * p)
+{
+   int bit = ffs(~p->utemp_flag);
+   if (!bit) {
+      i915_program_error(p, "i915_get_utemp: out of temporaries\n");
+      return 0;
+   }
+
+   p->utemp_flag |= 1 << (bit - 1);
+   return UREG(REG_TYPE_U, (bit - 1));
+}
+
+void
+i915_release_utemps(struct i915_fp_compile *p)
+{
+   p->utemp_flag = ~0x7;
+}
+
+
+uint
+i915_emit_decl(struct i915_fp_compile *p,
+               uint type, uint nr, uint d0_flags)
+{
+   uint reg = UREG(type, nr);
+
+   if (type == REG_TYPE_T) {
+      if (p->decl_t & (1 << nr))
+         return reg;
+
+      p->decl_t |= (1 << nr);
+   }
+   else if (type == REG_TYPE_S) {
+      if (p->decl_s & (1 << nr))
+         return reg;
+
+      p->decl_s |= (1 << nr);
+   }
+   else
+      return reg;
+
+   *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags);
+   *(p->decl++) = D1_MBZ;
+   *(p->decl++) = D2_MBZ;
+
+   p->nr_decl_insn++;
+   return reg;
+}
+
+uint
+i915_emit_arith(struct i915_fp_compile * p,
+                uint op,
+                uint dest,
+                uint mask,
+                uint saturate, uint src0, uint src1, uint src2)
+{
+   uint c[3];
+   uint nr_const = 0;
+
+   assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
+   dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest));
+   assert(dest);
+
+   if (GET_UREG_TYPE(src0) == REG_TYPE_CONST)
+      c[nr_const++] = 0;
+   if (GET_UREG_TYPE(src1) == REG_TYPE_CONST)
+      c[nr_const++] = 1;
+   if (GET_UREG_TYPE(src2) == REG_TYPE_CONST)
+      c[nr_const++] = 2;
+
+   /* Recursively call this function to MOV additional const values
+    * into temporary registers.  Use utemp registers for this -
+    * currently shouldn't be possible to run out, but keep an eye on
+    * this.
+    */
+   if (nr_const > 1) {
+      uint s[3], first, i, old_utemp_flag;
+
+      s[0] = src0;
+      s[1] = src1;
+      s[2] = src2;
+      old_utemp_flag = p->utemp_flag;
+
+      first = GET_UREG_NR(s[c[0]]);
+      for (i = 1; i < nr_const; i++) {
+         if (GET_UREG_NR(s[c[i]]) != first) {
+            uint tmp = i915_get_utemp(p);
+
+            i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
+                            s[c[i]], 0, 0);
+            s[c[i]] = tmp;
+         }
+      }
+
+      src0 = s[0];
+      src1 = s[1];
+      src2 = s[2];
+      p->utemp_flag = old_utemp_flag;   /* restore */
+   }
+
+   *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0));
+   *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1));
+   *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2));
+
+   p->nr_alu_insn++;
+   return dest;
+}
+
+uint i915_emit_texld( struct i915_fp_compile *p,
+                       uint dest,
+                       uint destmask,
+                       uint sampler,
+                       uint coord,
+                       uint op )
+{
+   if (coord != UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord))) {
+      /* No real way to work around this in the general case - need to
+       * allocate and declare a new temporary register (a utemp won't
+       * do).  Will fallback for now.
+       */
+      i915_program_error(p, "Can't (yet) swizzle TEX arguments");
+      return 0;
+   }
+
+   /* Don't worry about saturate as we only support  
+    */
+   if (destmask != A0_DEST_CHANNEL_ALL) {
+      uint tmp = i915_get_utemp(p);
+      i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op );
+      i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 );
+      return dest;
+   }
+   else {
+      assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
+      assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)));
+
+      if (GET_UREG_TYPE(coord) != REG_TYPE_T) {
+        p->nr_tex_indirect++;
+      }
+
+      *(p->csr++) = (op | 
+                    T0_DEST( dest ) |
+                    T0_SAMPLER( sampler ));
+
+      *(p->csr++) = T1_ADDRESS_REG( coord );
+      *(p->csr++) = T2_MBZ;
+
+      p->nr_tex_insn++;
+      return dest;
+   }
+}
+
+
+uint
+i915_emit_const1f(struct i915_fp_compile * p, float c0)
+{
+   int reg, idx;
+
+   if (c0 == 0.0)
+      return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
+   if (c0 == 1.0)
+      return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);
+
+   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+      if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
+         continue;
+      for (idx = 0; idx < 4; idx++) {
+#if 0
+         if (!(p->constant_flags[reg] & (1 << idx)) ||
+             p->fp->constant[reg][idx] == c0) {
+            p->fp->constant[reg][idx] = c0;
+            p->constant_flags[reg] |= 1 << idx;
+            if (reg + 1 > p->fp->nr_constants)
+               p->fp->nr_constants = reg + 1;
+            return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
+         }
+#else
+         if (!(p->constant_flags[reg] & (1 << idx)) ||
+             p->constants->constant[reg][idx] == c0) {
+            p->constants->constant[reg][idx] = c0;
+            p->constant_flags[reg] |= 1 << idx;
+            if (reg + 1 > p->constants->nr_constants)
+               p->constants->nr_constants = reg + 1;
+            return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
+         }
+#endif
+      }
+   }
+
+   i915_program_error(p, "i915_emit_const1f: out of constants\n");
+   return 0;
+}
+
+uint
+i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1)
+{
+   int reg, idx;
+
+   if (c0 == 0.0)
+      return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W);
+   if (c0 == 1.0)
+      return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W);
+
+   if (c1 == 0.0)
+      return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W);
+   if (c1 == 1.0)
+      return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W);
+
+   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+      if (p->constant_flags[reg] == 0xf ||
+          p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
+         continue;
+      for (idx = 0; idx < 3; idx++) {
+         if (!(p->constant_flags[reg] & (3 << idx))) {
+#if 0
+            p->fp->constant[reg][idx] = c0;
+            p->fp->constant[reg][idx + 1] = c1;
+            p->constant_flags[reg] |= 3 << idx;
+            if (reg + 1 > p->fp->nr_constants)
+               p->fp->nr_constants = reg + 1;
+            return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO,
+                           ONE);
+#else
+            p->constants->constant[reg][idx + 0] = c0;
+            p->constants->constant[reg][idx + 1] = c1;
+            p->constant_flags[reg] |= 3 << idx;
+            if (reg + 1 > p->constants->nr_constants)
+               p->constants->nr_constants = reg + 1;
+            return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO,
+                           ONE);
+#endif
+         }
+      }
+   }
+
+   i915_program_error(p, "i915_emit_const2f: out of constants\n");
+   return 0;
+}
+
+
+
+uint
+i915_emit_const4f(struct i915_fp_compile * p,
+                  float c0, float c1, float c2, float c3)
+{
+   int reg;
+
+   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+      if (p->constant_flags[reg] == 0xf &&
+#if 0
+          p->fp->constant[reg][0] == c0 &&
+          p->fp->constant[reg][1] == c1 &&
+          p->fp->constant[reg][2] == c2 && 
+         p->fp->constant[reg][3] == c3
+#else
+          p->constants->constant[reg][0] == c0 &&
+          p->constants->constant[reg][1] == c1 &&
+          p->constants->constant[reg][2] == c2 &&
+          p->constants->constant[reg][3] == c3
+#endif
+          ) {
+         return UREG(REG_TYPE_CONST, reg);
+      }
+      else if (p->constant_flags[reg] == 0) {
+#if 0
+         p->fp->constant[reg][0] = c0;
+         p->fp->constant[reg][1] = c1;
+         p->fp->constant[reg][2] = c2;
+         p->fp->constant[reg][3] = c3;
+#else
+         p->constants->constant[reg][0] = c0;
+         p->constants->constant[reg][1] = c1;
+         p->constants->constant[reg][2] = c2;
+         p->constants->constant[reg][3] = c3;
+#endif
+         p->constant_flags[reg] = 0xf;
+#if 0
+         if (reg + 1 > p->fp->nr_constants)
+            p->fp->nr_constants = reg + 1;
+#else
+         if (reg + 1 > p->constants->nr_constants)
+            p->constants->nr_constants = reg + 1;
+#endif
+         return UREG(REG_TYPE_CONST, reg);
+      }
+   }
+
+   i915_program_error(p, "i915_emit_const4f: out of constants\n");
+   return 0;
+}
+
+
+uint
+i915_emit_const4fv(struct i915_fp_compile * p, const float * c)
+{
+   return i915_emit_const4f(p, c[0], c[1], c[2], c[3]);
+}
+
+
+#if 00000/*UNUSED*/
+/* Reserve a slot in the constant file for a Mesa state parameter.
+ * These will later need to be tracked on statechanges, but that is
+ * done elsewhere.
+ */
+uint
+i915_emit_param4fv(struct i915_fp_compile * p, const float * values)
+{
+   struct i915_fragment_program *fp = p->fp;
+   int i;
+
+   for (i = 0; i < fp->nr_params; i++) {
+      if (fp->param[i].values == values)
+         return UREG(REG_TYPE_CONST, fp->param[i].reg);
+   }
+
+#if 0
+   if (fp->nr_constants == I915_MAX_CONSTANT ||
+       fp->nr_params == I915_MAX_CONSTANT) {
+#else
+   if (p->constants->nr_constants == I915_MAX_CONSTANT ||
+       fp->nr_params == I915_MAX_CONSTANT) {
+#endif
+      i915_program_error(p, "i915_emit_param4fv: out of constants\n");
+      return 0;
+   }
+
+   {
+#if 0
+      int reg = fp->nr_constants++;
+#else
+      int reg = p->constants->nr_constants++;
+#endif
+      int i = fp->nr_params++;
+
+      assert (p->constant_flags[reg] == 0);
+      p->constant_flags[reg] = I915_CONSTFLAG_PARAM;
+
+      fp->param[i].values = values;
+      fp->param[i].reg = reg;
+
+      return UREG(REG_TYPE_CONST, reg);
+   }
+}
+#endif
diff --git a/src/mesa/pipe/i915simple/i915_fpc_translate.c b/src/mesa/pipe/i915simple/i915_fpc_translate.c
new file mode 100644 (file)
index 0000000..a034e73
--- /dev/null
@@ -0,0 +1,838 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_fpc.h"
+
+#include "pipe/tgsi/core/tgsi_parse.h"
+
+
+/* 1, -1/3!, 1/5!, -1/7! */
+static const float sin_constants[4] = { 1.0,
+   -1.0 / (3 * 2 * 1),
+   1.0 / (5 * 4 * 3 * 2 * 1),
+   -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1)
+};
+
+/* 1, -1/2!, 1/4!, -1/6! */
+static const float cos_constants[4] = { 1.0,
+   -1.0 / (2 * 1),
+   1.0 / (4 * 3 * 2 * 1),
+   -1.0 / (6 * 5 * 4 * 3 * 2 * 1)
+};
+
+
+/**
+ * Construct a ureg for the given source register.  Will emit
+ * constants, apply swizzling and negation as needed.
+ */
+static uint
+src_vector(struct i915_fp_compile *p,
+           const struct tgsi_full_src_register *source)
+{
+   const uint index = source->SrcRegister.Index;
+   uint src;
+
+   switch (source->SrcRegisterInd.File) {
+   case TGSI_FILE_TEMPORARY:
+      if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) {
+         i915_program_error(p, "Exceeded max temporary reg");
+         return 0;
+      }
+      src = UREG(REG_TYPE_R, index);
+      break;
+   case TGSI_FILE_INPUT:
+      /* XXX: Packing COL1, FOGC into a single attribute works for
+       * texenv programs, but will fail for real fragment programs
+       * that use these attributes and expect them to be a full 4
+       * components wide.  Could use a texcoord to pass these
+       * attributes if necessary, but that won't work in the general
+       * case.
+       * 
+       * We also use a texture coordinate to pass wpos when possible.
+       */
+      switch (index) {
+      case FRAG_ATTRIB_WPOS:
+         src = i915_emit_decl(p, REG_TYPE_T, p->fp->wpos_tex, D0_CHANNEL_ALL);
+         break;
+      case FRAG_ATTRIB_COL0:
+         src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
+         break;
+      case FRAG_ATTRIB_COL1:
+         src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
+         src = swizzle(src, X, Y, Z, ONE);
+         break;
+      case FRAG_ATTRIB_FOGC:
+         src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
+         src = swizzle(src, W, W, W, W);
+         break;
+      case FRAG_ATTRIB_TEX0:
+      case FRAG_ATTRIB_TEX1:
+      case FRAG_ATTRIB_TEX2:
+      case FRAG_ATTRIB_TEX3:
+      case FRAG_ATTRIB_TEX4:
+      case FRAG_ATTRIB_TEX5:
+      case FRAG_ATTRIB_TEX6:
+      case FRAG_ATTRIB_TEX7:
+         src = i915_emit_decl(p, REG_TYPE_T,
+                              T_TEX0 + (index - FRAG_ATTRIB_TEX0),
+                              D0_CHANNEL_ALL);
+         break;
+
+      default:
+         i915_program_error(p, "Bad source->Index");
+         return 0;
+      }
+      break;
+
+      /* Various parameters and env values.  All emitted to
+       * hardware as program constants.
+       */
+#if 0
+   case PROGRAM_LOCAL_PARAM:
+      src = i915_emit_param4fv(p, program->Base.LocalParams[index]);
+      break;
+   case PROGRAM_ENV_PARAM:
+      src = i915_emit_param4fv(p, p->env_param[index]);
+      break;
+   case PROGRAM_CONSTANT:
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_NAMED_PARAM:
+      src = i915_emit_param4fv(
+        p, program->Base.Parameters->ParameterValues[index]);
+      break;
+#else
+   case TGSI_FILE_CONSTANT:
+      src = UREG(REG_TYPE_CONST, index);
+      break;
+#endif
+
+   default:
+      i915_program_error(p, "Bad source->File");
+      return 0;
+   }
+
+   src = swizzle(src,
+                 source->SrcRegister.SwizzleX,
+                 source->SrcRegister.SwizzleY,
+                 source->SrcRegister.SwizzleZ,
+                 source->SrcRegister.SwizzleW);
+
+   assert(!source->SrcRegister.Negate);
+   assert(!source->SrcRegisterExtSwz.NegateX);
+   assert(!source->SrcRegisterExtSwz.NegateY);
+   assert(!source->SrcRegisterExtSwz.NegateZ);
+   assert(!source->SrcRegisterExtSwz.NegateW);
+   assert(!source->SrcRegisterExtMod.Absolute);
+   assert(!source->SrcRegisterExtMod.Negate);
+#if 0
+   if (source->SrcRegister.Negate)
+      negate all 
+
+   if (extended source swiz per component)
+      src = negate(src,
+                   source->SrcRegisterExtSwz.NegateX,
+                   source->SrcRegisterExtSwz.NegateY,
+                   source->SrcRegisterExtSwz.NegateZ,
+                   source->SrcRegisterExtSwz.NegateW);
+   if (mod.abs)
+      absolute value
+
+   if (mod.negate)
+      another negate;
+#endif
+   return src;
+}
+
+
+static uint
+get_result_vector(struct i915_fp_compile *p,
+                  const struct tgsi_full_dst_register *dest)
+{
+   switch (dest->DstRegister.File) {
+   case TGSI_FILE_OUTPUT:
+      switch (dest->DstRegister.Index) {
+      case FRAG_RESULT_COLR:
+         return UREG(REG_TYPE_OC, 0);
+      case FRAG_RESULT_DEPR:
+         return UREG(REG_TYPE_OD, 0);
+      default:
+         i915_program_error(p, "Bad inst->DstReg.Index");
+         return 0;
+      }
+   case TGSI_FILE_TEMPORARY:
+      return UREG(REG_TYPE_R, dest->DstRegister.Index);
+   default:
+      i915_program_error(p, "Bad inst->DstReg.File");
+      return 0;
+   }
+}
+
+
+/**
+ * Compute flags for saturation and writemask.
+ */
+static uint
+get_result_flags(const struct tgsi_full_instruction *inst)
+{
+   const uint writeMask
+      = inst->FullDstRegisters[0].DstRegister.WriteMask;
+   uint flags = 0x0;
+
+   if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE)
+      flags |= A0_DEST_SATURATE;
+
+   if (writeMask & TGSI_WRITEMASK_X)
+      flags |= A0_DEST_CHANNEL_X;
+   if (writeMask & TGSI_WRITEMASK_Y)
+      flags |= A0_DEST_CHANNEL_Y;
+   if (writeMask & TGSI_WRITEMASK_Z)
+      flags |= A0_DEST_CHANNEL_Z;
+   if (writeMask & TGSI_WRITEMASK_W)
+      flags |= A0_DEST_CHANNEL_W;
+
+   return flags;
+}
+
+
+/**
+ * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
+ */
+static uint
+translate_tex_src_target(struct i915_fp_compile *p, uint tex)
+{
+   switch (tex) {
+   case TGSI_TEXTURE_1D:
+      return D0_SAMPLE_TYPE_2D;
+   case TGSI_TEXTURE_2D:
+      return D0_SAMPLE_TYPE_2D;
+   case TGSI_TEXTURE_RECT:
+      return D0_SAMPLE_TYPE_2D;
+   case TGSI_TEXTURE_3D:
+      return D0_SAMPLE_TYPE_VOLUME;
+   case TGSI_TEXTURE_CUBE:
+      return D0_SAMPLE_TYPE_CUBE;
+   default:
+      i915_program_error(p, "TexSrc type");
+      return 0;
+   }
+}
+
+
+/**
+ * Generate texel lookup instruction.
+ */
+static void
+emit_tex(struct i915_fp_compile *p,
+         const struct tgsi_full_instruction *inst,
+         uint opcode)
+{
+   uint texture = inst->InstructionExtTexture.Texture;
+   uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+   uint tex = translate_tex_src_target( p, texture );
+   uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
+   uint coord = src_vector( p, &inst->FullSrcRegisters[0]);
+
+   i915_emit_texld( p,
+                    get_result_vector( p, &inst->FullDstRegisters[0] ),
+                    get_result_flags( inst ),
+                    sampler,
+                    coord,
+                    opcode);
+}
+
+
+/**
+ * Generate a simple arithmetic instruction
+ * \param opcode  the i915 opcode
+ * \param numArgs  the number of input/src arguments
+ */
+static void
+emit_simple_arith(struct i915_fp_compile *p,
+                  const struct tgsi_full_instruction *inst,
+                  uint opcode, uint numArgs)
+{
+   uint arg1, arg2, arg3;
+
+   assert(numArgs <= 3);
+
+   arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] );
+   arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] );
+   arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] );
+
+   i915_emit_arith( p,
+                    opcode,
+                    get_result_vector( p, &inst->FullDstRegisters[0]),
+                    get_result_flags( inst ), 0,
+                    arg1,
+                    arg2,
+                    arg3 );
+}
+
+
+#define EMIT_1ARG_ARITH( OP ) emit_simple_arith(p, inst, OP, 1)
+#define EMIT_2ARG_ARITH( OP ) emit_simple_arith(p, inst, OP, 2)
+#define EMIT_3ARG_ARITH( OP ) emit_simple_arith(p, inst, OP, 3)
+
+
+
+static void
+i915_translate_instruction(struct i915_fp_compile *p,
+                           const struct tgsi_full_instruction *inst)
+{
+   uint writemask;
+   uint src0, src1, src2, flags;
+   uint tmp = 0;
+
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_ABS:
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      i915_emit_arith(p,
+                      A0_MAX,
+                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_flags(inst), 0,
+                      src0, negate(src0, 1, 1, 1, 1), 0);
+      break;
+
+   case TGSI_OPCODE_ADD:
+      EMIT_2ARG_ARITH(A0_ADD);
+      break;
+
+   case TGSI_OPCODE_CMP:
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+      src2 = src_vector(p, &inst->FullSrcRegisters[2]);
+      i915_emit_arith(p, A0_CMP, 
+                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_flags(inst), 
+                      0, src0, src2, src1);   /* NOTE: order of src2, src1 */
+      break;
+
+   case TGSI_OPCODE_COS:
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      tmp = i915_get_utemp(p);
+
+      i915_emit_arith(p,
+                      A0_MUL,
+                      tmp, A0_DEST_CHANNEL_X, 0,
+                      src0, i915_emit_const1f(p, 1.0 / (M_PI * 2)), 0);
+
+      i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
+
+      /* By choosing different taylor constants, could get rid of this mul:
+       */
+      i915_emit_arith(p,
+                      A0_MUL,
+                      tmp, A0_DEST_CHANNEL_X, 0,
+                      tmp, i915_emit_const1f(p, (M_PI * 2)), 0);
+
+      /* 
+       * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
+       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
+       * t0 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
+       * result = DP4 t0, cos_constants
+       */
+      i915_emit_arith(p,
+                      A0_MUL,
+                      tmp, A0_DEST_CHANNEL_XY, 0,
+                      swizzle(tmp, X, X, ONE, ONE),
+                      swizzle(tmp, X, ONE, ONE, ONE), 0);
+
+      i915_emit_arith(p,
+                      A0_MUL,
+                      tmp, A0_DEST_CHANNEL_XYZ, 0,
+                      swizzle(tmp, X, Y, X, ONE),
+                      swizzle(tmp, X, X, ONE, ONE), 0);
+
+      i915_emit_arith(p,
+                      A0_MUL,
+                      tmp, A0_DEST_CHANNEL_XYZ, 0,
+                      swizzle(tmp, X, X, Z, ONE),
+                      swizzle(tmp, Z, ONE, ONE, ONE), 0);
+
+      i915_emit_arith(p,
+                      A0_DP4,
+                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_flags(inst), 0,
+                      swizzle(tmp, ONE, Z, Y, X),
+                      i915_emit_const4fv(p, cos_constants), 0);
+      break;
+
+   case TGSI_OPCODE_DP3:
+      EMIT_2ARG_ARITH(A0_DP3);
+      break;
+
+   case TGSI_OPCODE_DP4:
+      EMIT_2ARG_ARITH(A0_DP4);
+      break;
+
+   case TGSI_OPCODE_DPH:
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+
+      i915_emit_arith(p,
+                      A0_DP4,
+                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_flags(inst), 0,
+                      swizzle(src0, X, Y, Z, ONE), src1, 0);
+      break;
+
+   case TGSI_OPCODE_DST:
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+
+      /* result[0] = 1    * 1;
+       * result[1] = a[1] * b[1];
+       * result[2] = a[2] * 1;
+       * result[3] = 1    * b[3];
+       */
+      i915_emit_arith(p,
+                      A0_MUL,
+                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_flags(inst), 0,
+                      swizzle(src0, ONE, Y, Z, ONE),
+                      swizzle(src1, ONE, Y, ONE, W), 0);
+      break;
+
+   case TGSI_OPCODE_EX2:
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+
+      i915_emit_arith(p,
+                      A0_EXP,
+                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_flags(inst), 0,
+                      swizzle(src0, X, X, X, X), 0, 0);
+      break;
+
+   case TGSI_OPCODE_FLR:
+      EMIT_1ARG_ARITH(A0_FLR);
+      break;
+
+   case TGSI_OPCODE_FRC:
+      EMIT_1ARG_ARITH(A0_FRC);
+      break;
+
+   case TGSI_OPCODE_KIL:
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      tmp = i915_get_utemp(p);
+
+      i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL,   /* use a dummy dest reg */
+                      0, src0, T0_TEXKILL);
+      break;
+
+   case TGSI_OPCODE_LG2:
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+
+      i915_emit_arith(p,
+                      A0_LOG,
+                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_flags(inst), 0,
+                      swizzle(src0, X, X, X, X), 0, 0);
+      break;
+
+   case TGSI_OPCODE_LIT:
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      tmp = i915_get_utemp(p);
+
+      /* tmp = max( a.xyzw, a.00zw )
+       * XXX: Clamp tmp.w to -128..128
+       * tmp.y = log(tmp.y)
+       * tmp.y = tmp.w * tmp.y
+       * tmp.y = exp(tmp.y)
+       * result = cmp (a.11-x1, a.1x01, a.1xy1 )
+       */
+      i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
+                      src0, swizzle(src0, ZERO, ZERO, Z, W), 0);
+
+      i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
+                      swizzle(tmp, Y, Y, Y, Y), 0, 0);
+
+      i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
+                      swizzle(tmp, ZERO, Y, ZERO, ZERO),
+                      swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
+
+      i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
+                      swizzle(tmp, Y, Y, Y, Y), 0, 0);
+
+      i915_emit_arith(p, A0_CMP,
+                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_flags(inst), 0,
+                      negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
+                      swizzle(tmp, ONE, X, ZERO, ONE),
+                      swizzle(tmp, ONE, X, Y, ONE));
+
+      break;
+
+   case TGSI_OPCODE_LRP:
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+      src2 = src_vector(p, &inst->FullSrcRegisters[2]);
+      flags = get_result_flags(inst);
+      tmp = i915_get_utemp(p);
+
+      /* b*a + c*(1-a)
+       *
+       * b*a + c - ca 
+       *
+       * tmp = b*a + c, 
+       * result = (-c)*a + tmp 
+       */
+      i915_emit_arith(p, A0_MAD, tmp,
+                      flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
+
+      i915_emit_arith(p, A0_MAD,
+                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
+      break;
+
+   case TGSI_OPCODE_MAD:
+      EMIT_3ARG_ARITH(A0_MAD);
+      break;
+
+   case TGSI_OPCODE_MAX:
+      EMIT_2ARG_ARITH(A0_MAX);
+      break;
+
+   case TGSI_OPCODE_MIN:
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+      tmp = i915_get_utemp(p);
+      flags = get_result_flags(inst);
+
+      i915_emit_arith(p,
+                      A0_MAX,
+                      tmp, flags & A0_DEST_CHANNEL_ALL, 0,
+                      negate(src0, 1, 1, 1, 1),
+                      negate(src1, 1, 1, 1, 1), 0);
+
+      i915_emit_arith(p,
+                      A0_MOV,
+                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
+      break;
+
+   case TGSI_OPCODE_MOV:
+      /* aka TGSI_OPCODE_SWZ */
+      EMIT_1ARG_ARITH(A0_MOV);
+      break;
+
+   case TGSI_OPCODE_MUL:
+      EMIT_2ARG_ARITH(A0_MUL);
+      break;
+
+   case TGSI_OPCODE_POW:
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+      tmp = i915_get_utemp(p);
+      flags = get_result_flags(inst);
+
+      /* XXX: masking on intermediate values, here and elsewhere.
+       */
+      i915_emit_arith(p,
+                      A0_LOG,
+                      tmp, A0_DEST_CHANNEL_X, 0,
+                      swizzle(src0, X, X, X, X), 0, 0);
+
+      i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
+
+      i915_emit_arith(p,
+                      A0_EXP,
+                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
+      break;
+      
+   case TGSI_OPCODE_RCP:
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+
+      i915_emit_arith(p,
+                      A0_RCP,
+                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                         get_result_flags(inst), 0,
+                      swizzle(src0, X, X, X, X), 0, 0);
+      break;
+
+   case TGSI_OPCODE_RSQ:
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+
+      i915_emit_arith(p,
+                      A0_RSQ,
+                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_flags(inst), 0,
+                      swizzle(src0, X, X, X, X), 0, 0);
+      break;
+
+   case TGSI_OPCODE_SCS:
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      tmp = i915_get_utemp(p);
+
+      /* 
+       * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
+       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
+       * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
+       * scs.x = DP4 t1, sin_constants
+       * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
+       * scs.y = DP4 t1, cos_constants
+       */
+      i915_emit_arith(p,
+                      A0_MUL,
+                      tmp, A0_DEST_CHANNEL_XY, 0,
+                      swizzle(src0, X, X, ONE, ONE),
+                      swizzle(src0, X, ONE, ONE, ONE), 0);
+
+      i915_emit_arith(p,
+                      A0_MUL,
+                      tmp, A0_DEST_CHANNEL_ALL, 0,
+                      swizzle(tmp, X, Y, X, Y),
+                      swizzle(tmp, X, X, ONE, ONE), 0);
+
+      writemask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+
+      if (writemask & TGSI_WRITEMASK_Y) {
+         uint tmp1;
+
+         if (writemask & TGSI_WRITEMASK_X)
+            tmp1 = i915_get_utemp(p);
+         else
+            tmp1 = tmp;
+
+         i915_emit_arith(p,
+                         A0_MUL,
+                         tmp1, A0_DEST_CHANNEL_ALL, 0,
+                         swizzle(tmp, X, Y, Y, W),
+                         swizzle(tmp, X, Z, ONE, ONE), 0);
+
+         i915_emit_arith(p,
+                         A0_DP4,
+                         get_result_vector(p, &inst->FullDstRegisters[0]),
+                         A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp1, W, Z, Y, X),
+                         i915_emit_const4fv(p, sin_constants), 0);
+      }
+
+      if (writemask & TGSI_WRITEMASK_X) {
+         i915_emit_arith(p,
+                         A0_MUL,
+                         tmp, A0_DEST_CHANNEL_XYZ, 0,
+                         swizzle(tmp, X, X, Z, ONE),
+                         swizzle(tmp, Z, ONE, ONE, ONE), 0);
+
+         i915_emit_arith(p,
+                         A0_DP4,
+                         get_result_vector(p, &inst->FullDstRegisters[0]),
+                         A0_DEST_CHANNEL_X, 0,
+                         swizzle(tmp, ONE, Z, Y, X),
+                         i915_emit_const4fv(p, cos_constants), 0);
+      }
+      break;
+
+   case TGSI_OPCODE_SGE:
+      EMIT_2ARG_ARITH(A0_SGE);
+      break;
+
+   case TGSI_OPCODE_SIN:
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      tmp = i915_get_utemp(p);
+
+      i915_emit_arith(p,
+                      A0_MUL,
+                      tmp, A0_DEST_CHANNEL_X, 0,
+                      src0, i915_emit_const1f(p, 1.0 / (M_PI * 2)), 0);
+
+      i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
+
+      /* By choosing different taylor constants, could get rid of this mul:
+       */
+      i915_emit_arith(p,
+                      A0_MUL,
+                      tmp, A0_DEST_CHANNEL_X, 0,
+                      tmp, i915_emit_const1f(p, (M_PI * 2)), 0);
+
+      /* 
+       * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
+       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
+       * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
+       * result = DP4 t1.wzyx, sin_constants
+       */
+      i915_emit_arith(p,
+                      A0_MUL,
+                      tmp, A0_DEST_CHANNEL_XY, 0,
+                      swizzle(tmp, X, X, ONE, ONE),
+                      swizzle(tmp, X, ONE, ONE, ONE), 0);
+
+      i915_emit_arith(p,
+                      A0_MUL,
+                      tmp, A0_DEST_CHANNEL_ALL, 0,
+                      swizzle(tmp, X, Y, X, Y),
+                      swizzle(tmp, X, X, ONE, ONE), 0);
+
+      i915_emit_arith(p,
+                      A0_MUL,
+                      tmp, A0_DEST_CHANNEL_ALL, 0,
+                      swizzle(tmp, X, Y, Y, W),
+                      swizzle(tmp, X, Z, ONE, ONE), 0);
+
+      i915_emit_arith(p,
+                      A0_DP4,
+                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_flags(inst), 0,
+                      swizzle(tmp, W, Z, Y, X),
+                      i915_emit_const4fv(p, sin_constants), 0);
+      break;
+
+   case TGSI_OPCODE_SLT:
+      EMIT_2ARG_ARITH(A0_SLT);
+      break;
+
+   case TGSI_OPCODE_SUB:
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+
+      i915_emit_arith(p,
+                      A0_ADD,
+                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_flags(inst), 0,
+                      src0, negate(src1, 1, 1, 1, 1), 0);
+      break;
+
+   case TGSI_OPCODE_TEX:
+      emit_tex(p, inst, T0_TEXLD);
+      break;
+
+   case TGSI_OPCODE_TXB:
+      emit_tex(p, inst, T0_TEXLDB);
+      break;
+
+   case TGSI_OPCODE_TXP:
+      emit_tex(p, inst, T0_TEXLDP);
+      break;
+
+   case TGSI_OPCODE_XPD:
+      /* Cross product:
+       *      result.x = src0.y * src1.z - src0.z * src1.y;
+       *      result.y = src0.z * src1.x - src0.x * src1.z;
+       *      result.z = src0.x * src1.y - src0.y * src1.x;
+       *      result.w = undef;
+       */
+      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+      tmp = i915_get_utemp(p);
+
+      i915_emit_arith(p,
+                      A0_MUL,
+                      tmp, A0_DEST_CHANNEL_ALL, 0,
+                      swizzle(src0, Z, X, Y, ONE),
+                      swizzle(src1, Y, Z, X, ONE), 0);
+
+      i915_emit_arith(p,
+                      A0_MAD,
+                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_flags(inst), 0,
+                      swizzle(src0, Y, Z, X, ONE),
+                      swizzle(src1, Z, X, Y, ONE),
+                      negate(tmp, 1, 1, 1, 0));
+      break;
+
+   default:
+      i915_program_error(p, "bad opcode");
+      return;
+   }
+
+   i915_release_utemps(p);
+}
+
+
+/**
+ * Translate TGSI fragment shader into i915 hardware instructions.
+ *
+ * Possible concerns:
+ *
+ * SIN, COS -- could use another taylor step?
+ * LIT      -- results seem a little different to sw mesa
+ * LOG      -- different to mesa on negative numbers, but this is conformant.
+ * 
+ * Parse failures -- Mesa doesn't currently give a good indication
+ * internally whether a particular program string parsed or not.  This
+ * can lead to confusion -- hopefully we cope with it ok now.
+ */
+void
+i915_translate_program(struct i915_fp_compile *p,
+                       const struct tgsi_token *tokens)
+{
+   struct tgsi_parse_context parse;
+
+   tgsi_parse_init( &parse, tokens );
+
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         assert(0);
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         assert(0);
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         i915_translate_instruction(p, &parse.FullToken.FullInstruction);
+         break;
+
+      default:
+         assert( 0 );
+      }
+
+   } /* while */
+
+   tgsi_parse_free (&parse);
+}
+
+
+
+
+/* Rather than trying to intercept and jiggle depth writes during
+ * emit, just move the value into its correct position at the end of
+ * the program:
+ */
+void
+i915_fixup_depth_write(struct i915_fp_compile *p)
+{
+   if (p->shader->outputs_written & (1<<FRAG_RESULT_DEPR)) {
+      uint depth = UREG(REG_TYPE_OD, 0);
+
+      i915_emit_arith(p,
+                      A0_MOV,
+                      depth, A0_DEST_CHANNEL_W, 0,
+                      swizzle(depth, X, Y, Z, Z), 0, 0);
+   }
+}
+
+
+
+