gallivm: Add a new interface for doing TGSI->LLVM conversions
authorTom Stellard <thomas.stellard@amd.com>
Fri, 12 Aug 2011 16:42:41 +0000 (12:42 -0400)
committerTom Stellard <thomas.stellard@amd.com>
Mon, 30 Jan 2012 18:37:01 +0000 (13:37 -0500)
lp_bld_tgsi_soa.c has been adapted to use this new interface, but
lp_bld_tgsi_aos.c has only been partially adapted, since nothing in
gallium currently uses it.

v2:
- Rename lp_bld_tgsi_action.[ch] => lp_bld_tgsi_action.[ch]
- Initialize tgsi_info in lp_bld_tgsi_aos.c
- Fix copyright dates

src/gallium/auxiliary/Makefile.sources
src/gallium/auxiliary/gallivm/lp_bld_tgsi.c [new file with mode: 0644]
src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c [new file with mode: 0644]
src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h [new file with mode: 0644]
src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c

index f55a4eb..740e301 100644 (file)
@@ -176,6 +176,8 @@ GALLIVM_SOURCES := \
         gallivm/lp_bld_sample_soa.c \
         gallivm/lp_bld_struct.c \
         gallivm/lp_bld_swizzle.c \
+        gallivm/lp_bld_tgsi.c \
+        gallivm/lp_bld_tgsi_action.c \
         gallivm/lp_bld_tgsi_aos.c \
         gallivm/lp_bld_tgsi_info.c \
         gallivm/lp_bld_tgsi_soa.c \
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
new file mode 100644 (file)
index 0000000..261301c
--- /dev/null
@@ -0,0 +1,409 @@
+/**************************************************************************
+ *
+ * Copyright 2011-2012 Advanced Micro Devices, Inc.
+ * Copyright 2010 VMware, Inc.
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "gallivm/lp_bld_tgsi.h"
+
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_gather.h"
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_intr.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "util/u_memory.h"
+
+/* The user is responsible for freeing list->instructions */
+unsigned lp_bld_tgsi_list_init(struct lp_build_tgsi_context * bld_base)
+{
+   bld_base->instructions = (struct tgsi_full_instruction *)
+         MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
+   if (!bld_base->instructions) {
+      return 0;
+   }
+   bld_base->max_instructions = LP_MAX_INSTRUCTIONS;
+   return 1;
+}
+
+
+unsigned lp_bld_tgsi_add_instruction(
+   struct lp_build_tgsi_context * bld_base,
+   struct tgsi_full_instruction *inst_to_add)
+{
+
+   if (bld_base->num_instructions == bld_base->max_instructions) {
+      struct tgsi_full_instruction *instructions;
+      instructions = REALLOC(bld_base->instructions, bld_base->max_instructions
+                                      * sizeof(struct tgsi_full_instruction),
+                                      (bld_base->max_instructions + LP_MAX_INSTRUCTIONS)
+                                      * sizeof(struct tgsi_full_instruction));
+      if (!instructions) {
+         return 0;
+      }
+      bld_base->instructions = instructions;
+      bld_base->max_instructions += LP_MAX_INSTRUCTIONS;
+   }
+   memcpy(bld_base->instructions + bld_base->num_instructions, inst_to_add,
+          sizeof(bld_base->instructions[0]));
+
+   bld_base->num_instructions++;
+
+   return 1;
+}
+
+
+/**
+ * This function assumes that all the args in emit_data have been set.
+ */
+static void
+lp_build_action_set_dst_type(
+   struct lp_build_emit_data * emit_data,
+   struct lp_build_tgsi_context *bld_base,
+   unsigned tgsi_opcode)
+{
+   if (emit_data->arg_count == 0) {
+      emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
+   } else {
+      /* XXX: Not all opcodes have the same src and dst types. */
+      emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
+   }
+}
+
+void
+lp_build_tgsi_intrinsic(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_context * base = &bld_base->base;
+   emit_data->output[emit_data->chan] = lp_build_intrinsic(
+               base->gallivm->builder, action->intr_name,
+               emit_data->dst_type, emit_data->args, emit_data->arg_count);
+}
+
+LLVMValueRef
+lp_build_emit_llvm(
+   struct lp_build_tgsi_context *bld_base,
+   unsigned tgsi_opcode,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_action * action = &bld_base->op_actions[tgsi_opcode];
+   /* XXX: Assert that this is a componentwise or replicate instruction */
+
+   lp_build_action_set_dst_type(emit_data, bld_base, tgsi_opcode);
+   emit_data->chan = 0;
+   assert(action->emit);
+   action->emit(action, bld_base, emit_data);
+   return emit_data->output[0];
+}
+
+LLVMValueRef
+lp_build_emit_llvm_unary(
+   struct lp_build_tgsi_context *bld_base,
+   unsigned tgsi_opcode,
+   LLVMValueRef arg0)
+{
+   struct lp_build_emit_data emit_data;
+   emit_data.arg_count = 1;
+   emit_data.args[0] = arg0;
+   return lp_build_emit_llvm(bld_base, tgsi_opcode, &emit_data);
+}
+
+LLVMValueRef
+lp_build_emit_llvm_binary(
+   struct lp_build_tgsi_context *bld_base,
+   unsigned tgsi_opcode,
+   LLVMValueRef arg0,
+   LLVMValueRef arg1)
+{
+   struct lp_build_emit_data emit_data;
+   emit_data.arg_count = 2;
+   emit_data.args[0] = arg0;
+   emit_data.args[1] = arg1;
+   return lp_build_emit_llvm(bld_base, tgsi_opcode, &emit_data);
+}
+
+LLVMValueRef
+lp_build_emit_llvm_ternary(
+   struct lp_build_tgsi_context *bld_base,
+   unsigned tgsi_opcode,
+   LLVMValueRef arg0,
+   LLVMValueRef arg1,
+   LLVMValueRef arg2)
+{
+   struct lp_build_emit_data emit_data;
+   emit_data.arg_count = 3;
+   emit_data.args[0] = arg0;
+   emit_data.args[1] = arg1;
+   emit_data.args[2] = arg2;
+   return lp_build_emit_llvm(bld_base, tgsi_opcode, &emit_data);
+}
+
+/**
+ * The default fetch implementation.
+ */
+void lp_build_fetch_args(
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   unsigned src;
+   for (src = 0; src < emit_data->info->num_src; src++) {
+      emit_data->args[src] = lp_build_emit_fetch(bld_base, emit_data->inst, src,
+                                               emit_data->chan);
+   }
+   emit_data->arg_count = emit_data->info->num_src;
+   lp_build_action_set_dst_type(emit_data, bld_base,
+               emit_data->inst->Instruction.Opcode);
+}
+
+/* XXX: COMMENT
+ * It should be assumed that this function ignores writemasks
+ */
+boolean
+lp_build_tgsi_inst_llvm(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_full_instruction * inst)
+{
+   unsigned tgsi_opcode = inst->Instruction.Opcode;
+   const struct tgsi_opcode_info * info = tgsi_get_opcode_info(tgsi_opcode);
+   const struct lp_build_tgsi_action * action =
+                                         &bld_base->op_actions[tgsi_opcode];
+   struct lp_build_emit_data emit_data;
+   unsigned chan_index;
+   LLVMValueRef val;
+
+   bld_base->pc++;
+
+   /* Ignore deprecated instructions */
+   switch (inst->Instruction.Opcode) {
+
+   case TGSI_OPCODE_RCC:
+   case TGSI_OPCODE_UP2H:
+   case TGSI_OPCODE_UP2US:
+   case TGSI_OPCODE_UP4B:
+   case TGSI_OPCODE_UP4UB:
+   case TGSI_OPCODE_X2D:
+   case TGSI_OPCODE_ARA:
+   case TGSI_OPCODE_BRA:
+   case TGSI_OPCODE_DIV:
+   case TGSI_OPCODE_PUSHA:
+   case TGSI_OPCODE_POPA:
+   case TGSI_OPCODE_I2F:
+   case TGSI_OPCODE_NOT:
+   case TGSI_OPCODE_SHL:
+   case TGSI_OPCODE_ISHR:
+   case TGSI_OPCODE_AND:
+   case TGSI_OPCODE_OR:
+   case TGSI_OPCODE_MOD:
+   case TGSI_OPCODE_XOR:
+   case TGSI_OPCODE_SAD:
+   case TGSI_OPCODE_TXF:
+   case TGSI_OPCODE_TXQ:
+      /* deprecated? */
+      assert(0);
+      return FALSE;
+      break;
+   }
+
+   /* Check if the opcode has been implemented */
+   if (!action->emit) {
+      return FALSE;
+   }
+
+   memset(&emit_data, 0, sizeof(emit_data));
+
+   assert(info->num_dst <= 1);
+   if (info->num_dst) {
+      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+         emit_data.output[chan_index] = bld_base->base.undef;
+      }
+   }
+
+   emit_data.inst = inst;
+   emit_data.info = info;
+
+   /* Emit the instructions */
+   if (info->output_mode == TGSI_OUTPUT_COMPONENTWISE && bld_base->soa) {
+      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
+         emit_data.chan = chan_index;
+         if (!action->fetch_args) {
+            lp_build_fetch_args(bld_base, &emit_data);
+         } else {
+             action->fetch_args(bld_base, &emit_data);
+         }
+         action->emit(action, bld_base, &emit_data);
+      }
+   } else {
+      emit_data.chan = LP_CHAN_ALL;
+      if (action->fetch_args) {
+         action->fetch_args(bld_base, &emit_data);
+      }
+      /* Make sure the output value is stored in emit_data.output[0], unless
+       * the opcode is channel dependent */
+      if (info->output_mode != TGSI_OUTPUT_CHAN_DEPENDENT) {
+         emit_data.chan = 0;
+      }
+      action->emit(action, bld_base, &emit_data);
+
+      /* Replicate the output values */
+      if (info->output_mode == TGSI_OUTPUT_REPLICATE && bld_base->soa) {
+         val = emit_data.output[0];
+         memset(emit_data.output, 0, sizeof(emit_data.output));
+         TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
+            emit_data.output[chan_index] = val;
+         }
+      }
+   }
+
+   if (info->num_dst > 0) {
+      bld_base->emit_store(bld_base, inst, info, emit_data.output);
+   }
+   return TRUE;
+}
+
+
+LLVMValueRef
+lp_build_emit_fetch(
+   struct lp_build_tgsi_context *bld_base,
+   const struct tgsi_full_instruction *inst,
+   unsigned src_op,
+   const unsigned chan_index)
+{
+   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
+   unsigned swizzle;
+   LLVMValueRef res;
+
+   if (chan_index == LP_CHAN_ALL) {
+      swizzle = ~0;
+   } else {
+      swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index);
+      if (swizzle > 3) {
+         assert(0 && "invalid swizzle in emit_fetch()");
+         return bld_base->base.undef;
+      }
+   }
+
+   assert(reg->Register.Index <= bld_base->info->file_max[reg->Register.File]);
+
+   if (bld_base->emit_fetch_funcs[reg->Register.File]) {
+      res = bld_base->emit_fetch_funcs[reg->Register.File](bld_base, reg,
+                                                           swizzle);
+   } else {
+      assert(0 && "invalid src register in emit_fetch()");
+      return bld_base->base.undef;
+   }
+
+   if (reg->Register.Absolute) {
+      res = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS, res);
+   }
+
+   if (reg->Register.Negate) {
+      res = lp_build_negate( &bld_base->base, res );
+   }
+
+   /*
+    * Swizzle the argument
+    */
+
+   if (swizzle == ~0) {
+      res = bld_base->emit_swizzle(bld_base, res,
+                     reg->Register.SwizzleX,
+                     reg->Register.SwizzleY,
+                     reg->Register.SwizzleZ,
+                     reg->Register.SwizzleW);
+   }
+
+   return res;
+
+}
+
+boolean
+lp_build_tgsi_llvm(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_token *tokens)
+{
+   struct tgsi_parse_context parse;
+
+   if (bld_base->emit_prologue) {
+      bld_base->emit_prologue(bld_base);
+   }
+
+   if (!lp_bld_tgsi_list_init(bld_base)) {
+      return FALSE;
+   }
+
+   tgsi_parse_init( &parse, tokens );
+
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         /* Inputs already interpolated */
+         bld_base->emit_declaration(bld_base, &parse.FullToken.FullDeclaration);
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         lp_bld_tgsi_add_instruction(bld_base, &parse.FullToken.FullInstruction);
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         bld_base->emit_immediate(bld_base, &parse.FullToken.FullImmediate);
+         break;
+
+      case TGSI_TOKEN_TYPE_PROPERTY:
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+
+   while (bld_base->pc != -1) {
+      struct tgsi_full_instruction *instr = bld_base->instructions +
+                                                       bld_base->pc;
+      const struct tgsi_opcode_info *opcode_info =
+         tgsi_get_opcode_info(instr->Instruction.Opcode);
+      if (!lp_build_tgsi_inst_llvm(bld_base, instr)) {
+         _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
+                       opcode_info->mnemonic);
+         return FALSE;
+      }
+   }
+
+   tgsi_parse_free(&parse);
+
+   FREE(bld_base->instructions);
+
+   if (bld_base->emit_epilogue) {
+      bld_base->emit_epilogue(bld_base);
+   }
+
+   return TRUE;
+}
index 9713d10..b03eefc 100644 (file)
@@ -1,5 +1,6 @@
 /**************************************************************************
  *
+ * Copyright 2011-2012 Advanced Micro Devices, Inc.
  * Copyright 2009 VMware, Inc.
  * All Rights Reserved.
  *
  * TGSI to LLVM IR translation.
  *
  * @author Jose Fonseca <jfonseca@vmware.com>
+ * @author Tom Stellard <thomas.stellard@amd.com>
  */
 
 #ifndef LP_BLD_TGSI_H
 #define LP_BLD_TGSI_H
 
 #include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_tgsi_action.h"
+#include "gallivm/lp_bld_limits.h"
+#include "lp_bld_type.h"
 #include "pipe/p_compiler.h"
 #include "pipe/p_state.h"
+#include "tgsi/tgsi_exec.h"
 #include "tgsi/tgsi_scan.h"
 
 
+#define LP_CHAN_ALL ~0
+
+#define LP_MAX_INSTRUCTIONS 256
+
+struct tgsi_full_declaration;
+struct tgsi_full_immediate;
+struct tgsi_full_instruction;
+struct tgsi_full_src_register;
+struct tgsi_opcode_info;
 struct tgsi_token;
 struct tgsi_shader_info;
-struct lp_type;
-struct lp_build_context;
 struct lp_build_mask_context;
 struct gallivm_state;
 
@@ -207,4 +220,328 @@ lp_build_system_values_array(struct gallivm_state *gallivm,
                              LLVMValueRef facing);
 
 
+struct lp_exec_mask {
+   struct lp_build_context *bld;
+
+   boolean has_mask;
+
+   LLVMTypeRef int_vec_type;
+
+   LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
+   int cond_stack_size;
+   LLVMValueRef cond_mask;
+
+   LLVMBasicBlockRef loop_block;
+   LLVMValueRef cont_mask;
+   LLVMValueRef break_mask;
+   LLVMValueRef break_var;
+   struct {
+      LLVMBasicBlockRef loop_block;
+      LLVMValueRef cont_mask;
+      LLVMValueRef break_mask;
+      LLVMValueRef break_var;
+   } loop_stack[LP_MAX_TGSI_NESTING];
+   int loop_stack_size;
+
+   LLVMValueRef ret_mask;
+   struct {
+      int pc;
+      LLVMValueRef ret_mask;
+   } call_stack[LP_MAX_TGSI_NESTING];
+   int call_stack_size;
+
+   LLVMValueRef exec_mask;
+};
+
+struct lp_build_tgsi_inst_list
+{
+   struct tgsi_full_instruction *instructions;
+   uint max_instructions;
+   uint num_instructions;
+};
+
+unsigned lp_bld_tgsi_list_init(struct lp_build_tgsi_context * bld_base);
+
+
+unsigned lp_bld_tgsi_add_instruction(
+   struct lp_build_tgsi_context * bld_base,
+   struct tgsi_full_instruction *inst_to_add);
+
+
+struct lp_build_tgsi_context;
+
+
+typedef LLVMValueRef (*lp_build_emit_fetch_fn)(struct lp_build_tgsi_context *,
+                                        const struct tgsi_full_src_register *,
+                                        unsigned);
+
+struct lp_build_tgsi_context
+{
+   struct lp_build_context base;
+
+   /** This array stores functions that are used to transform TGSI opcodes to
+     * LLVM instructions.
+     */
+   struct lp_build_tgsi_action op_actions[TGSI_OPCODE_LAST];
+
+   /* TGSI_OPCODE_RSQ is defined as 1 / sqrt( abs(src0.x) ), rsq_action
+    * should compute 1 / sqrt (src0.x) */
+   struct lp_build_tgsi_action rsq_action;
+
+   const struct tgsi_shader_info *info;
+
+   lp_build_emit_fetch_fn emit_fetch_funcs[TGSI_FILE_COUNT];
+
+   LLVMValueRef (*emit_swizzle)(struct lp_build_tgsi_context *,
+                         LLVMValueRef, unsigned, unsigned, unsigned, unsigned);
+
+   void (*emit_store)(struct lp_build_tgsi_context *,
+                      const struct tgsi_full_instruction *,
+                      const struct tgsi_opcode_info *,
+                      LLVMValueRef dst[4]);
+
+   void (*emit_declaration)(struct lp_build_tgsi_context *,
+                             const struct tgsi_full_declaration *decl);
+
+   void (*emit_immediate)(struct lp_build_tgsi_context *,
+                          const struct tgsi_full_immediate *imm);
+
+
+   /* Allow the user to store data in this structure rather than passing it
+    * to every function. */
+   void * userdata;
+
+   boolean soa;
+
+   int pc;
+
+   struct tgsi_full_instruction *instructions;
+   uint max_instructions;
+   uint num_instructions;
+
+   /** This function allows the user to insert some instructions at the
+     * beginning of the program.  It is optional and does not need to be
+     * implemented.
+     */
+   void (*emit_prologue)(struct lp_build_tgsi_context*);
+
+   /** This function allows the user to insert some instructions at the end of
+     * the program.  This callback is intended to be used for emitting
+     * instructions to handle the export for the output registers, but it can
+     * be used for any purpose.  Implementing this function is optiona, but
+     * recommended.
+     */
+   void (*emit_epilogue)(struct lp_build_tgsi_context*);
+};
+
+struct lp_build_tgsi_soa_context
+{
+   struct lp_build_tgsi_context bld_base;
+
+   /* Builder for vector integer masks and indices */
+   struct lp_build_context uint_bld;
+
+   /* Builder for scalar elements of shader's data type (float) */
+   struct lp_build_context elem_bld;
+
+   LLVMValueRef consts_ptr;
+   const LLVMValueRef *pos;
+   const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS];
+   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS];
+
+   const struct lp_build_sampler_soa *sampler;
+
+   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][TGSI_NUM_CHANNELS];
+   LLVMValueRef temps[LP_MAX_TGSI_TEMPS][TGSI_NUM_CHANNELS];
+   LLVMValueRef addr[LP_MAX_TGSI_ADDRS][TGSI_NUM_CHANNELS];
+   LLVMValueRef preds[LP_MAX_TGSI_PREDS][TGSI_NUM_CHANNELS];
+
+   /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
+    * set in the indirect_files field.
+    * The temps[] array above is unused then.
+    */
+   LLVMValueRef temps_array;
+
+   /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
+    * set in the indirect_files field.
+    * The outputs[] array above is unused then.
+    */
+   LLVMValueRef outputs_array;
+
+   /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is
+    * set in the indirect_files field.
+    * The inputs[] array above is unused then.
+    */
+   LLVMValueRef inputs_array;
+
+   LLVMValueRef system_values_array;
+
+   /** bitmask indicating which register files are accessed indirectly */
+   unsigned indirect_files;
+
+   struct lp_build_mask_context *mask;
+   struct lp_exec_mask exec_mask;
+
+   uint num_immediates;
+
+};
+
+void
+lp_emit_declaration_soa(
+   struct lp_build_tgsi_context *bld,
+   const struct tgsi_full_declaration *decl);
+
+void lp_emit_immediate_soa(
+   struct lp_build_tgsi_context *bld_base,
+   const struct tgsi_full_immediate *imm);
+
+boolean
+lp_emit_instruction_soa(
+   struct lp_build_tgsi_soa_context *bld,
+   const struct tgsi_full_instruction *inst,
+   const struct tgsi_opcode_info *info);
+
+
+LLVMValueRef
+lp_get_temp_ptr_soa(
+   struct lp_build_tgsi_soa_context *bld,
+   unsigned index,
+   unsigned chan);
+
+LLVMValueRef
+lp_get_output_ptr(
+   struct lp_build_tgsi_soa_context *bld,
+   unsigned index,
+   unsigned chan);
+
+struct lp_build_tgsi_aos_context
+{
+   struct lp_build_tgsi_context bld_base;
+
+   /* Builder for integer masks and indices */
+   struct lp_build_context int_bld;
+
+   /*
+    * AoS swizzle used:
+    * - swizzles[0] = red index
+    * - swizzles[1] = green index
+    * - swizzles[2] = blue index
+    * - swizzles[3] = alpha index
+    */
+   unsigned char swizzles[4];
+   unsigned char inv_swizzles[4];
+
+   LLVMValueRef consts_ptr;
+   const LLVMValueRef *inputs;
+   LLVMValueRef *outputs;
+
+   struct lp_build_sampler_aos *sampler;
+
+   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
+   LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
+   LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
+   LLVMValueRef preds[LP_MAX_TGSI_PREDS];
+
+   /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
+    * set in the indirect_files field.
+    * The temps[] array above is unused then.
+    */
+   LLVMValueRef temps_array;
+
+   /** bitmask indicating which register files are accessed indirectly */
+   unsigned indirect_files;
+
+};
+
+static INLINE struct lp_build_tgsi_soa_context *
+lp_soa_context(struct lp_build_tgsi_context *bld_base)
+{
+   return (struct lp_build_tgsi_soa_context *)bld_base;
+}
+
+static INLINE struct lp_build_tgsi_aos_context *
+lp_aos_context(struct lp_build_tgsi_context *bld_base)
+{
+   return (struct lp_build_tgsi_aos_context *)bld_base;
+}
+
+void
+lp_emit_declaration_aos(
+   struct lp_build_tgsi_aos_context *bld,
+   const struct tgsi_full_declaration *decl);
+
+
+boolean
+lp_emit_instruction_aos(
+   struct lp_build_tgsi_aos_context *bld,
+   const struct tgsi_full_instruction *inst,
+   const struct tgsi_opcode_info *info,
+   int *pc);
+
+void
+lp_emit_store_aos(
+   struct lp_build_tgsi_aos_context *bld,
+   const struct tgsi_full_instruction *inst,
+   unsigned index,
+   LLVMValueRef value);
+
+void lp_build_fetch_args(
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data);
+
+LLVMValueRef
+lp_build_tgsi_inst_llvm_aos(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_full_instruction *inst);
+
+void
+lp_build_tgsi_intrinsic(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data);
+
+LLVMValueRef
+lp_build_emit_llvm(
+   struct lp_build_tgsi_context *bld_base,
+   unsigned tgsi_opcode,
+   struct lp_build_emit_data * emit_data);
+
+LLVMValueRef
+lp_build_emit_llvm_unary(
+   struct lp_build_tgsi_context *bld_base,
+   unsigned tgsi_opcode,
+   LLVMValueRef arg0);
+
+LLVMValueRef
+lp_build_emit_llvm_binary(
+   struct lp_build_tgsi_context *bld_base,
+   unsigned tgsi_opcode,
+   LLVMValueRef arg0,
+   LLVMValueRef arg1);
+
+LLVMValueRef
+lp_build_emit_llvm_ternary(
+   struct lp_build_tgsi_context *bld_base,
+   unsigned tgsi_opcode,
+   LLVMValueRef arg0,
+   LLVMValueRef arg1,
+   LLVMValueRef arg2);
+
+boolean
+lp_build_tgsi_inst_llvm(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_full_instruction *inst);
+
+LLVMValueRef
+lp_build_emit_fetch(
+   struct lp_build_tgsi_context *bld_base,
+   const struct tgsi_full_instruction *inst,
+   unsigned src_op,
+   const unsigned chan_index);
+
+boolean
+lp_build_tgsi_llvm(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_token *tokens);
+
 #endif /* LP_BLD_TGSI_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
new file mode 100644 (file)
index 0000000..9688acc
--- /dev/null
@@ -0,0 +1,1182 @@
+/**************************************************************************
+ * 
+ * Copyright 2011-2012 Advanced Micro Devices, Inc.
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * @file
+ * TGSI to LLVM IR translation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ * @author Tom Stellard <thomas.stellard@amd.com>
+ *
+ * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
+ * Brian Paul, and others.
+ */
+
+
+#include "lp_bld_tgsi_action.h"
+
+#include "lp_bld_tgsi.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_const.h"
+#include "lp_bld_gather.h"
+#include "lp_bld_logic.h"
+
+#include "tgsi/tgsi_exec.h"
+
+/* XXX: The CPU only defaults should be repaced by generic ones.  In most
+ * cases, the CPU defaults are just wrappers around a function in
+ * lp_build_arit.c and these functions should be inlined here and the CPU
+ * generic code should be removed and placed elsewhere.
+ */
+
+/* Default actions */
+
+/* Generic fetch_arg functions */
+
+static void scalar_unary_fetch_args(
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   /* src0.x */
+   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
+   emit_data->arg_count = 1;
+   emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
+}
+
+static void scalar_binary_fetch_args(
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   /* src0.x */
+   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+                                            0, TGSI_CHAN_X);
+   /* src1.x */
+   emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+                                            1, TGSI_CHAN_X);
+   emit_data->arg_count = 2;
+   emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_ADD */
+static void
+add_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = LLVMBuildFAdd(
+                                bld_base->base.gallivm->builder,
+                                emit_data->args[0], emit_data->args[1], "");
+}
+
+/* TGSI_OPCODE_ARR */
+static void
+arr_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_emit_llvm_unary(bld_base,
+                                         TGSI_OPCODE_ROUND, emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_CLAMP */
+static void
+clamp_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef tmp;
+   tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
+                                   emit_data->args[0],
+                                   emit_data->args[1]);
+   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
+                                       TGSI_OPCODE_MIN, tmp, emit_data->args[2]);
+}
+
+/* DP* Helper */
+
+static void
+dp_fetch_args(
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data,
+   unsigned dp_components)
+{
+   unsigned chan, src;
+   for (src = 0; src < 2; src++) {
+      for (chan = 0; chan < dp_components; chan++) {
+         emit_data->args[(src * dp_components) + chan] =
+                     lp_build_emit_fetch(bld_base, emit_data->inst, src, chan);
+      }
+   }
+   emit_data->dst_type = bld_base->base.elem_type;
+}
+
+/* TGSI_OPCODE_DP2 */
+static void
+dp2_fetch_args(
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   dp_fetch_args(bld_base, emit_data, 2);
+}
+
+static void
+dp2_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef tmp0, tmp1;
+   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+                                    emit_data->args[0] /* src0.x */,
+                                    emit_data->args[2] /* src1.x */);
+   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+                                    emit_data->args[1] /* src0.y */,
+                                    emit_data->args[3] /* src1.y */);
+   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
+                                                    TGSI_OPCODE_ADD, tmp0, tmp1);
+}
+
+static struct lp_build_tgsi_action dp2_action = {
+   .fetch_args = dp2_fetch_args,
+   .emit = dp2_emit
+};
+
+/* TGSI_OPCODE_DP2A */
+static void
+dp2a_fetch_args(
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   dp_fetch_args(bld_base, emit_data, 2);
+   emit_data->args[5] = lp_build_emit_fetch(bld_base, emit_data->inst,
+                                            2, TGSI_CHAN_X);
+}
+
+static void
+dp2a_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef tmp;
+   tmp = lp_build_emit_llvm(bld_base, TGSI_OPCODE_DP2, emit_data);
+   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD,
+                                    emit_data->args[5], tmp);
+}
+
+static struct lp_build_tgsi_action dp2a_action = {
+   .fetch_args = dp2a_fetch_args,
+   .emit = dp2a_emit
+};
+
+/* TGSI_OPCODE_DP3 */
+static void
+dp3_fetch_args(
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   dp_fetch_args(bld_base, emit_data, 3);
+}
+
+static void
+dp3_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef tmp0, tmp1;
+   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+                                    emit_data->args[0] /* src0.x */,
+                                    emit_data->args[3] /* src1.x */);
+   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+                                    emit_data->args[1] /* src0.y */,
+                                    emit_data->args[4] /* src1.y */);
+   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp1, tmp0);
+   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+                                    emit_data->args[2] /* src0.z */,
+                                    emit_data->args[5] /* src1.z */);
+   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
+                                                    TGSI_OPCODE_ADD, tmp0, tmp1);
+}
+
+static struct lp_build_tgsi_action dp3_action = {
+   .fetch_args = dp3_fetch_args,
+   .emit = dp3_emit
+};
+
+/* TGSI_OPCODDE_DP4 */
+
+static void
+dp4_fetch_args(
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   dp_fetch_args(bld_base, emit_data, 4);
+}
+
+static void
+dp4_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef tmp0, tmp1;
+   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+                                    emit_data->args[0] /* src0.x */,
+                                    emit_data->args[4] /* src1.x */);
+   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+                                    emit_data->args[1] /* src0.y */,
+                                    emit_data->args[5] /* src1.y */);
+   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1);
+   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+                                    emit_data->args[2] /* src0.z */,
+                                    emit_data->args[6] /* src1.z */);
+   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1);
+   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+                                    emit_data->args[3] /* src0.w */,
+                                    emit_data->args[7] /* src1.w */);
+   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
+                                                    TGSI_OPCODE_ADD, tmp0, tmp1);
+}
+
+static struct lp_build_tgsi_action dp4_action = {
+   .fetch_args = dp4_fetch_args,
+   .emit = dp4_emit
+};
+
+/* TGSI_OPCODE_DPH */
+static void
+dph_fetch_args(
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   dp_fetch_args(bld_base, emit_data, 4);
+   /* src0.w */
+   emit_data->args[3] = bld_base->base.one;
+}
+
+const struct lp_build_tgsi_action dph_action = {
+   .fetch_args = dph_fetch_args,
+   .emit = dp4_emit
+};
+
+/* TGSI_OPCODE_DST */
+static void
+dst_fetch_args(
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   /* src0.y */
+   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+                                            0, TGSI_CHAN_Y);
+   /* src0.z */
+   emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+                                            0, TGSI_CHAN_Z);
+   /* src1.y */
+   emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst,
+                                            1, TGSI_CHAN_Y);
+   /* src1.w */
+   emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst,
+                                            1, TGSI_CHAN_W);
+}
+
+static void
+dst_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   /* dst.x */
+   emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
+
+   /* dst.y */
+   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
+                                          TGSI_OPCODE_MUL,
+                                          emit_data->args[0] /* src0.y */,
+                                          emit_data->args[2] /* src1.y */);
+   /* dst.z */
+   emit_data->output[TGSI_CHAN_Z] = emit_data->args[1]; /* src0.z */
+
+   /* dst.w */
+   emit_data->output[TGSI_CHAN_W] = emit_data->args[3]; /* src1.w */
+}
+
+static struct lp_build_tgsi_action dst_action = {
+   .fetch_args = dst_fetch_args,
+   .emit = dst_emit
+};
+
+/* TGSI_OPCODE_END */
+static void
+end_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   bld_base->pc = -1;
+}
+
+/* TGSI_OPCODE_EXP */
+
+static void
+exp_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef floor_x;
+
+   /* floor( src0.x ) */
+   floor_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
+                                      emit_data->args[0]);
+
+   /* 2 ^ floor( src0.x ) */
+   emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
+                                       TGSI_OPCODE_EX2, floor_x);
+
+   /* src0.x - floor( src0.x ) */
+   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
+                   TGSI_OPCODE_SUB,  emit_data->args[0] /* src0.x */, floor_x);
+
+   /* 2 ^ src0.x */
+   emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_unary(bld_base,
+                             TGSI_OPCODE_EX2, emit_data->args[0] /* src0.x */);
+
+   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
+}
+
+const struct lp_build_tgsi_action exp_action = {
+   .fetch_args = scalar_unary_fetch_args,
+   .emit = exp_emit
+};
+
+/* TGSI_OPCODE_FRC */
+
+static void
+frc_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef tmp;
+   tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
+                                  emit_data->args[0]);
+   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
+                                       TGSI_OPCODE_SUB, emit_data->args[0], tmp);
+}
+
+/* TGSI_OPCODE_KIL */
+
+static void
+kil_fetch_args(
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   /* src0.x */
+   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+                                            0, TGSI_CHAN_X);
+   /* src0.y */
+   emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+                                            0, TGSI_CHAN_Y);
+   /* src0.z */
+   emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst,
+                                            0, TGSI_CHAN_Z);
+   /* src0.w */
+   emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst,
+                                            0, TGSI_CHAN_W);
+   emit_data->arg_count = 4;
+   emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
+}
+
+/* TGSI_OPCODE_KILP */
+
+static void
+kilp_fetch_args(
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
+}
+
+/* TGSI_OPCODE_LIT */
+
+static void
+lit_fetch_args(
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   /* src0.x */
+   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
+   /* src0.y */
+   emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
+   /* src0.w */
+   emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
+   emit_data->arg_count = 3;
+}
+
+static void
+lit_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef tmp0, tmp2;
+
+   /* dst.x */
+   emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
+
+   /* dst. y */
+   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
+                                               TGSI_OPCODE_MAX,
+                                               emit_data->args[0] /* src0.x */,
+                                               bld_base->base.zero);
+
+   /* dst.z */
+   /* XMM[1] = SrcReg[0].yyyy */
+   LLVMValueRef tmp1 = emit_data->args[1];
+   /* XMM[1] = max(XMM[1], 0) */
+   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
+                                    tmp1, bld_base->base.zero);
+   /* XMM[2] = SrcReg[0].wwww */
+   tmp2 = emit_data->args[2];
+   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_POW,
+                                    tmp1, tmp2);
+   tmp0 = emit_data->args[0];
+   emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_ternary(bld_base,
+                                             TGSI_OPCODE_CMP,
+                                             tmp0, bld_base->base.zero, tmp1);
+   /* dst.w */
+   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
+}
+
+static struct lp_build_tgsi_action lit_action = {
+   .fetch_args = lit_fetch_args,
+   .emit = lit_emit
+};
+
+/* TGSI_OPCODE_LOG */
+
+static void
+log_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+
+   LLVMValueRef abs_x, log_abs_x, flr_log_abs_x, ex2_flr_log_abs_x;
+
+   /* abs( src0.x) */
+   abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
+                                    emit_data->args[0] /* src0.x */);
+
+   /* log( abs( src0.x ) ) */
+   log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_LG2,
+                                        abs_x);
+
+   /* floor( log( abs( src0.x ) ) ) */
+   flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
+                                            log_abs_x);
+   /* dst.x */
+   emit_data->output[TGSI_CHAN_X] = flr_log_abs_x;
+
+   /* dst.y */
+   ex2_flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_EX2,
+                                                flr_log_abs_x);
+
+   /* abs( src0.x ) / 2^( floor( lg2( abs( src0.x ) ) ) ) */
+   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
+                                    TGSI_OPCODE_DIV, abs_x, ex2_flr_log_abs_x);
+
+   /* dst.x */
+   emit_data->output[TGSI_CHAN_Z] = log_abs_x;
+
+   /* dst.w */
+   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
+}
+
+static struct lp_build_tgsi_action log_action = {
+   .fetch_args = scalar_unary_fetch_args,
+   .emit = log_emit
+};
+
+/* TGSI_OPCODE_LRP */
+
+static void
+lrp_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef tmp;
+   tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB,
+                                   emit_data->args[1],
+                                   emit_data->args[2]);
+   emit_data->output[emit_data->chan] = lp_build_emit_llvm_ternary(bld_base,
+                    TGSI_OPCODE_MAD, emit_data->args[0], tmp, emit_data->args[2]);
+}
+
+/* TGSI_OPCODE_MAD */
+
+static void
+mad_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef tmp;
+   tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
+                                   emit_data->args[0],
+                                   emit_data->args[1]);
+   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
+                                       TGSI_OPCODE_ADD, tmp, emit_data->args[2]);
+}
+
+/* TGSI_OPCODE_MOV */
+
+static void
+mov_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = emit_data->args[0];
+}
+
+/* TGSI_OPCODE_MUL */
+static void
+mul_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_mul(&bld_base->base,
+                                   emit_data->args[0], emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_POW */
+
+static void
+pow_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base,
+                                   emit_data->args[0], emit_data->args[1]);
+}
+
+static struct lp_build_tgsi_action pow_action = {
+   .fetch_args = scalar_binary_fetch_args,
+   .emit = pow_emit
+};
+
+/* TGSI_OPCODE_RSQ */
+
+static void
+rsq_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->args[0] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
+                                               emit_data->args[0]);
+   if (bld_base->rsq_action.emit) {
+      bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data);
+   } else {
+      emit_data->output[emit_data->chan] = bld_base->base.undef;
+   }
+}
+
+const struct lp_build_tgsi_action rsq_action = {
+   .fetch_args = scalar_unary_fetch_args,
+   .emit = rsq_emit
+
+};
+
+/* TGSI_OPCODE_SCS */
+static void
+scs_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   /* dst.x */
+   emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
+                                           TGSI_OPCODE_COS, emit_data->args[0]);
+   /* dst.y */
+   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_unary(bld_base,
+                                           TGSI_OPCODE_SIN, emit_data->args[0]);
+   /* dst.z */
+   emit_data->output[TGSI_CHAN_Z] = bld_base->base.zero;
+
+   /* dst.w */
+   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
+}
+
+const struct lp_build_tgsi_action scs_action = {
+   .fetch_args = scalar_unary_fetch_args,
+   .emit = scs_emit
+};
+
+/* TGSI_OPCODE_SFL */
+
+static void
+sfl_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = bld_base->base.zero;
+}
+
+/* TGSI_OPCODE_STR */
+
+static void
+str_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = bld_base->base.one;
+}
+
+/* TGSI_OPCODE_SUB */
+static void
+sub_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+       emit_data->output[emit_data->chan] = LLVMBuildFSub(
+                               bld_base->base.gallivm->builder,
+                               emit_data->args[0],
+                               emit_data->args[1], "");
+}
+
+/* TGSI_OPCODE_XPD */
+
+static void
+xpd_fetch_args(
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   dp_fetch_args(bld_base, emit_data, 3);
+}
+
+/**
+ * (a * b) - (c * d)
+ */
+static LLVMValueRef
+xpd_helper(
+  struct lp_build_tgsi_context * bld_base,
+  LLVMValueRef a,
+  LLVMValueRef b,
+  LLVMValueRef c,
+  LLVMValueRef d)
+{
+   LLVMValueRef tmp0, tmp1;
+
+   tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, a, b);
+   tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, c, d);
+
+   return lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB, tmp0, tmp1);
+}
+
+static void
+xpd_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[TGSI_CHAN_X] = xpd_helper(bld_base,
+              emit_data->args[1] /* src0.y */, emit_data->args[5] /* src1.z */,
+              emit_data->args[4] /* src1.y */, emit_data->args[2] /* src0.z */);
+
+   emit_data->output[TGSI_CHAN_Y] = xpd_helper(bld_base,
+              emit_data->args[2] /* src0.z */, emit_data->args[3] /* src1.x */,
+              emit_data->args[5] /* src1.z */, emit_data->args[0] /* src0.x */);
+
+   emit_data->output[TGSI_CHAN_Z] = xpd_helper(bld_base,
+              emit_data->args[0] /* src0.x */, emit_data->args[4] /* src1.y */,
+              emit_data->args[3] /* src1.x */, emit_data->args[1] /* src0.y */);
+
+   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
+}
+
+const struct lp_build_tgsi_action xpd_action = {
+   .fetch_args = xpd_fetch_args,
+   .emit = xpd_emit
+};
+
+void
+lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
+{
+   bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
+   bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
+   bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
+   bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action;
+   bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action;
+   bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
+   bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
+   bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
+   bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
+   bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
+   bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
+   bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
+   bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
+
+   bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = scalar_unary_fetch_args;
+   bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = scalar_unary_fetch_args;
+   bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args;
+   bld_base->op_actions[TGSI_OPCODE_KIL].fetch_args = kil_fetch_args;
+   bld_base->op_actions[TGSI_OPCODE_KILP].fetch_args = kilp_fetch_args;
+   bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args = scalar_unary_fetch_args;
+   bld_base->op_actions[TGSI_OPCODE_SIN].fetch_args = scalar_unary_fetch_args;
+   bld_base->op_actions[TGSI_OPCODE_LG2].fetch_args = scalar_unary_fetch_args;
+
+   bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit;
+   bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit;
+   bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = clamp_emit;
+   bld_base->op_actions[TGSI_OPCODE_END].emit = end_emit;
+   bld_base->op_actions[TGSI_OPCODE_FRC].emit = frc_emit;
+   bld_base->op_actions[TGSI_OPCODE_LRP].emit = lrp_emit;
+   bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit;
+   bld_base->op_actions[TGSI_OPCODE_MOV].emit = mov_emit;
+   bld_base->op_actions[TGSI_OPCODE_MUL].emit = mul_emit;
+   bld_base->op_actions[TGSI_OPCODE_SFL].emit = sfl_emit;
+   bld_base->op_actions[TGSI_OPCODE_STR].emit = str_emit;
+   bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit;
+}
+
+/* CPU Only default actions */
+
+/* These actions are CPU only, because they could potentially output SSE
+ * intrinsics.
+ */
+
+/* TGSI_OPCODE_ABS (CPU Only)*/
+
+static void
+abs_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->base,
+                                                       emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_ADD (CPU Only) */
+static void
+add_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_add(&bld_base->base,
+                                   emit_data->args[0], emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_CEIL (CPU Only) */
+static void
+ceil_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_trunc(&bld_base->base,
+                                                         emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_CMP (CPU Only) */
+static void
+cmp_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef cond = lp_build_cmp(&bld_base->base, PIPE_FUNC_LESS,
+                                   emit_data->args[0], bld_base->base.zero);
+   emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
+                                cond, emit_data->args[1], emit_data->args[2]);
+}
+
+/* TGSI_OPCODE_CND (CPU Only) */
+static void
+cnd_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef half, tmp;
+   half = lp_build_const_vec(bld_base->base.gallivm, bld_base->base.type, 0.5);
+   tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_GREATER,
+                      emit_data->args[2], half);
+   emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
+                                          tmp,
+                                          emit_data->args[0],
+                                          emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_COS (CPU Only) */
+static void
+cos_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_cos(&bld_base->base,
+                                                       emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_DIV (CPU Only) */
+static void
+div_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_div(&bld_base->base,
+                                   emit_data->args[0], emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_EX2 (CPU Only) */
+static void
+ex2_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_exp2(&bld_base->base,
+                                                        emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_EXP (CPU Only) */
+static void
+exp_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   lp_build_exp2_approx(&bld_base->base, emit_data->args[0],
+                        &emit_data->output[TGSI_CHAN_X],
+                        &emit_data->output[TGSI_CHAN_Y],
+                        &emit_data->output[TGSI_CHAN_Z]);
+   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
+}
+
+/* TGSI_OPCODE_FLR (CPU Only) */
+
+static void
+flr_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_floor(&bld_base->base,
+                                                         emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_LG2 (CPU Only) */
+static void
+lg2_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_log2(&bld_base->base,
+                                                        emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_LOG (CPU Only) */
+static void
+log_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef p_floor_log2;
+   LLVMValueRef p_exp;
+   LLVMValueRef p_log2;
+   LLVMValueRef src0 = emit_data->args[0];
+
+   lp_build_log2_approx(&bld_base->base, src0,
+                        &p_exp, &p_floor_log2, &p_log2);
+
+   emit_data->output[TGSI_CHAN_X] = p_floor_log2;
+
+   emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
+                                             TGSI_OPCODE_DIV,
+                                             src0, p_exp);
+   emit_data->output[TGSI_CHAN_Z] = p_log2;
+
+   emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
+
+}
+
+/* TGSI_OPCODE_MAX (CPU Only) */
+
+static void
+max_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_max(&bld_base->base,
+                                   emit_data->args[0], emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_MIN (CPU Only) */
+static void
+min_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_min(&bld_base->base,
+                                   emit_data->args[0], emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_POW (CPU Only) */
+static void
+pow_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base,
+                                   emit_data->args[0], emit_data->args[1]);
+}
+
+
+/* TGSI_OPCODE_RCP (CPU Only) */
+
+static void
+rcp_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_rcp(&bld_base->base,
+                                                       emit_data->args[0]);
+}
+
+/* Reciprical squareroot (CPU Only) */
+
+/* This is not the same as TGSI_OPCODE_RSQ, which requres the argument to be
+ * greater than or equal to 0 */
+static void
+recip_sqrt_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->base,
+                                                         emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_ROUND (CPU Only) */
+static void
+round_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_round(&bld_base->base,
+                                                         emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_SET Helper (CPU Only) */
+
+static void
+set_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data,
+   unsigned pipe_func)
+{
+   LLVMValueRef cond = lp_build_cmp(&bld_base->base, pipe_func,
+                                    emit_data->args[0], emit_data->args[1]);
+   emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
+                                          cond,
+                                          bld_base->base.one,
+                                          bld_base->base.zero);
+}
+
+/* TGSI_OPCODE_SEQ (CPU Only) */
+
+static void
+seq_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
+}
+
+/* TGSI_OPCODE_SGE (CPU Only) */
+static void
+sge_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
+}
+
+/* TGSI_OPCODE_SGT (CPU Only)*/
+
+static void
+sgt_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GREATER);
+}
+
+/* TGSI_OPCODE_SIN (CPU Only) */
+static void
+sin_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_sin(&bld_base->base,
+                                                       emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_SLE (CPU Only) */
+static void
+sle_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LEQUAL);
+}
+
+/* TGSI_OPCODE_SLT (CPU Only) */
+
+static void
+slt_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
+}
+
+/* TGSI_OPCODE_SNE (CPU Only) */
+
+static void
+sne_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
+}
+
+/* TGSI_OPCODE_SSG (CPU Only) */
+
+static void
+ssg_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->base,
+                                                       emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_SUB (CPU Only) */
+
+static void
+sub_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->base,
+                                                        emit_data->args[0],
+                                                        emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_TRUNC (CPU Only) */
+
+static void
+trunc_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_trunc(&bld_base->base,
+                                                         emit_data->args[0]);
+}
+
+void
+lp_set_default_actions_cpu(
+   struct lp_build_tgsi_context * bld_base)
+{
+   lp_set_default_actions(bld_base);
+   bld_base->op_actions[TGSI_OPCODE_ABS].emit = abs_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_ARL].emit = flr_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_CEIL].emit = ceil_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_CND].emit = cnd_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_COS].emit = cos_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_EXP].emit = exp_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_MAX].emit = max_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_MIN].emit = min_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_POW].emit = pow_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_ROUND].emit = round_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_SEQ].emit = seq_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_SGE].emit = sge_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_SGT].emit = sgt_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_SIN].emit = sin_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_SLE].emit = sle_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_SLT].emit = slt_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_SNE].emit = sne_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_SSG].emit = ssg_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = trunc_emit_cpu;
+
+   bld_base->rsq_action.emit = recip_sqrt_emit_cpu;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
new file mode 100644 (file)
index 0000000..818ff6c
--- /dev/null
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2011-2012 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ *
+ * @author Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+
+
+#ifndef LP_BLD_TGSI_ACTION_H
+#define LP_BLD_TGSI_ACTION_H
+
+#include <llvm-c/Core.h>
+
+struct lp_build_tgsi_context;
+
+struct lp_build_emit_data {
+   /** Arguments that are passed to lp_build_tgsi_action::emit.  The
+    * order of the arguments should be as follows:
+    * SOA: s0.x, s0.y, s0.z, s0.w, s1.x, s1.y, s1.z, s1.w, s2.x, s2.y, s2.x, s2.w
+    * AOS: s0.xyzw, s1.xyzw, s2.xyzw
+    * TEXTURE Instructions: coord.xyzw
+    *
+    * Arguments should be packed into the args array.  For example an SOA
+    * instructions that reads s0.x and s1.x args should look like this:
+    * args[0] = s0.x;
+    * args[1] = s1.x;
+    */
+   LLVMValueRef args[12];
+
+   /**
+    * Number of arguments in the args array.
+    */
+   unsigned arg_count;
+
+   /**
+    * The type output type of the opcode.  This should be set in the
+    * lp_build_tgsi_action::fetch_args function.
+    */
+   LLVMTypeRef dst_type;
+
+   /** This is used by the lp_build_tgsi_action::fetch_args function to
+    * determine which channel to read from the opcode arguments.  It also
+    * specifies which index of the output array should be written to by
+    * the lp_build_tgsi_action::emit function.  However, this value is
+    * usually ignored by any opcodes that are not TGSI_OUTPUT_COMPONENTWISE.
+    */
+   unsigned chan;
+
+   /** The lp_build_tgsi_action::emit 'executes' the opcode and writes the
+    * results to this array.
+    */
+   LLVMValueRef output[4];
+
+   /**
+    * The current instruction that is being 'executed'.
+    */
+   const struct tgsi_full_instruction * inst;
+   const struct tgsi_opcode_info * info;
+};
+
+struct lp_build_tgsi_action
+{
+
+   /**
+    * This function is responsible for doing 2-3 things:
+    * 1. Fetching the instruction arguments into the emit_data->args array.
+    * 2. Setting the number of arguments in emit_data->arg_count.
+    * 3. Setting the destination type in emit_data->dst_type (usually only
+    *    necessary for opcodes that are TGSI_OUTPUT_COMPONENTWISE).
+    */
+   void (*fetch_args)(struct lp_build_tgsi_context *,
+                      struct lp_build_emit_data *);
+
+
+   /**
+    * This function is responsible for emitting LLVM IR for a TGSI opcode.
+    * It should store the values it generates in the emit_data->output array
+    * and for TGSI_OUTPUT_COMPONENTWISE and TGSI_OUTPUT_REPLICATE instructions
+    * (and possibly others depending on the specific implementation), it should
+    * make sure to store the values in the array slot indexed by emit_data->chan.
+    */
+   void (*emit)(const struct lp_build_tgsi_action *,
+                        struct lp_build_tgsi_context *,
+                        struct lp_build_emit_data *);
+
+   /**
+    * This variable can be used to store an intrinsic name, in case the TGSI
+    * opcode will be replaced by a target specific intrinsic.  (There is a
+    * convenience function in lp_bld_tgsi.c called lp_build_tgsi_intrinsic()
+    * that can be assigned to lp_build_tgsi_action::emit and used for
+    * generating intrinsics).
+    */
+   const char * intr_name;
+};
+
+/**
+ * This function initializes the bld_base->op_actions array with some
+ * generic operand actions.
+ */
+void
+lp_set_default_actions(
+   struct lp_build_tgsi_context * bld_base);
+
+/*
+ * This function initialize the bld_base->op_actions array with some
+ * operand actions that are intended only for use when generating
+ * instructions to be executed on a CPU.
+ */
+void
+lp_set_default_actions_cpu(
+   struct lp_build_tgsi_context * bld_base);
+
+#endif /* LP_BLD_TGSI_ACTION_H */
index 74b3b75..53013f7 100644 (file)
 #include "lp_bld_flow.h"
 #include "lp_bld_quad.h"
 #include "lp_bld_tgsi.h"
-#include "lp_bld_limits.h"
 #include "lp_bld_debug.h"
 
 
-#define LP_MAX_INSTRUCTIONS 256
-
-
-struct lp_build_tgsi_aos_context
-{
-   struct lp_build_context base;
-
-   /* Builder for integer masks and indices */
-   struct lp_build_context int_bld;
-
-   /*
-    * AoS swizzle used:
-    * - swizzles[0] = red index
-    * - swizzles[1] = green index
-    * - swizzles[2] = blue index
-    * - swizzles[3] = alpha index
-    */
-   unsigned char swizzles[4];
-   unsigned char inv_swizzles[4];
-
-   LLVMValueRef consts_ptr;
-   const LLVMValueRef *inputs;
-   LLVMValueRef *outputs;
-
-   struct lp_build_sampler_aos *sampler;
-
-   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
-   LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
-   LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
-   LLVMValueRef preds[LP_MAX_TGSI_PREDS];
-
-   /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
-    * set in the indirect_files field.
-    * The temps[] array above is unused then.
-    */
-   LLVMValueRef temps_array;
-
-   /** bitmask indicating which register files are accessed indirectly */
-   unsigned indirect_files;
-
-   struct tgsi_full_instruction *instructions;
-   uint max_instructions;
-};
-
-
 /**
  * Wrapper around lp_build_swizzle_aos which translates swizzles to another 
  * ordering.
  */
 static LLVMValueRef
-swizzle_aos(struct lp_build_tgsi_aos_context *bld,
+swizzle_aos(struct lp_build_tgsi_context *bld_base,
             LLVMValueRef a,
             unsigned swizzle_x,
             unsigned swizzle_y,
@@ -117,6 +71,7 @@ swizzle_aos(struct lp_build_tgsi_aos_context *bld,
             unsigned swizzle_w)
 {
    unsigned char swizzles[4];
+   struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
 
    assert(swizzle_x < 4);
    assert(swizzle_y < 4);
@@ -128,7 +83,7 @@ swizzle_aos(struct lp_build_tgsi_aos_context *bld,
    swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
    swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
 
-   return lp_build_swizzle_aos(&bld->base, a, swizzles);
+   return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
 }
 
 
@@ -138,149 +93,133 @@ swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
                    unsigned chan)
 {
    chan = bld->swizzles[chan];
-   return lp_build_swizzle_scalar_aos(&bld->base, a, chan);
+   return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan);
 }
 
 
-/**
- * Register fetch.
- */
 static LLVMValueRef
-emit_fetch(
-   struct lp_build_tgsi_aos_context *bld,
-   const struct tgsi_full_instruction *inst,
-   unsigned src_op)
+emit_fetch_constant(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_full_src_register * reg,
+   const unsigned swizzle)
 {
-   LLVMBuilderRef builder = bld->base.gallivm->builder;
-   struct lp_type type = bld->base.type;
-   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
+   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   struct lp_type type = bld_base->base.type;
    LLVMValueRef res;
    unsigned chan;
 
    assert(!reg->Register.Indirect);
 
    /*
-    * Fetch the from the register file.
+    * Get the constants components
     */
 
-   switch (reg->Register.File) {
-   case TGSI_FILE_CONSTANT:
-      /*
-       * Get the constants components
-       */
-
-      res = bld->base.undef;
-      for (chan = 0; chan < 4; ++chan) {
-         LLVMValueRef index;
-         LLVMValueRef scalar_ptr;
-         LLVMValueRef scalar;
-         LLVMValueRef swizzle;
-
-         index = lp_build_const_int32(bld->base.gallivm, reg->Register.Index * 4 + chan);
-
-         scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
-                                   &index, 1, "");
+   res = bld->bld_base.base.undef;
+   for (chan = 0; chan < 4; ++chan) {
+      LLVMValueRef index;
+      LLVMValueRef scalar_ptr;
+      LLVMValueRef scalar;
+      LLVMValueRef swizzle;
 
-         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
+      index = lp_build_const_int32(bld->bld_base.base.gallivm,
+                                   reg->Register.Index * 4 + chan);
 
-         lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
+      scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
 
-         /*
-          * NOTE: constants array is always assumed to be RGBA
-          */
+      scalar = LLVMBuildLoad(builder, scalar_ptr, "");
 
-         swizzle = lp_build_const_int32(bld->base.gallivm, bld->swizzles[chan]);
-
-         res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
-      }
+      lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
 
       /*
-       * Broadcast the first quaternion to all others.
-       *
-       * XXX: could be factored into a reusable function.
+       * NOTE: constants array is always assumed to be RGBA
        */
 
-      if (type.length > 4) {
-         LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
-         unsigned i;
+      swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
+                                     bld->swizzles[chan]);
 
-         for (chan = 0; chan < 4; ++chan) {
-            shuffles[chan] = lp_build_const_int32(bld->base.gallivm, chan);
-         }
-
-         for (i = 4; i < type.length; ++i) {
-            shuffles[i] = shuffles[i % 4];
-         }
+      res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
+   }
 
-         res = LLVMBuildShuffleVector(builder,
-                                      res, bld->base.undef,
-                                      LLVMConstVector(shuffles, type.length),
-                                      "");
-      }
-      break;
+   /*
+    * Broadcast the first quaternion to all others.
+    *
+    * XXX: could be factored into a reusable function.
+    */
 
-   case TGSI_FILE_IMMEDIATE:
-      res = bld->immediates[reg->Register.Index];
-      assert(res);
-      break;
+   if (type.length > 4) {
+      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+      unsigned i;
 
-   case TGSI_FILE_INPUT:
-      res = bld->inputs[reg->Register.Index];
-      assert(res);
-      break;
-
-   case TGSI_FILE_TEMPORARY:
-      {
-         LLVMValueRef temp_ptr;
-         temp_ptr = bld->temps[reg->Register.Index];
-         res = LLVMBuildLoad(builder, temp_ptr, "");
-         if (!res)
-            return bld->base.undef;
+      for (chan = 0; chan < 4; ++chan) {
+         shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
       }
-      break;
 
-   default:
-      assert(0 && "invalid src register in emit_fetch()");
-      return bld->base.undef;
-   }
-
-   /*
-    * Apply sign modifier.
-    */
+      for (i = 4; i < type.length; ++i) {
+         shuffles[i] = shuffles[i % 4];
+      }
 
-   if (reg->Register.Absolute) {
-      res = lp_build_abs(&bld->base, res);
+      res = LLVMBuildShuffleVector(builder,
+                                   res, bld->bld_base.base.undef,
+                                   LLVMConstVector(shuffles, type.length),
+                                   "");
    }
+   return res;
+}
 
-   if(reg->Register.Negate) {
-      res = lp_build_negate(&bld->base, res);
-   }
+static LLVMValueRef
+emit_fetch_immediate(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_full_src_register * reg,
+   const unsigned swizzle)
+{
+   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
+   LLVMValueRef res = bld->immediates[reg->Register.Index];
+   assert(res);
+   return res;
+}
 
-   /*
-    * Swizzle the argument
-    */
+static LLVMValueRef
+emit_fetch_input(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_full_src_register * reg,
+   const unsigned swizzle)
+{
+   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
+   LLVMValueRef res = bld->inputs[reg->Register.Index];
+   assert(!reg->Register.Indirect);
+   assert(res);
+   return res;
+}
 
-   res = swizzle_aos(bld, res,
-                     reg->Register.SwizzleX,
-                     reg->Register.SwizzleY,
-                     reg->Register.SwizzleZ,
-                     reg->Register.SwizzleW);
+static LLVMValueRef
+emit_fetch_temporary(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_full_src_register * reg,
+   const unsigned swizzle)
+{
+   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
+   LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
+   assert(!reg->Register.Indirect);
+   if (!res)
+      return bld->bld_base.base.undef;
 
    return res;
 }
 
-
 /**
  * Register store.
  */
-static void
-emit_store(
+void
+lp_emit_store_aos(
    struct lp_build_tgsi_aos_context *bld,
    const struct tgsi_full_instruction *inst,
    unsigned index,
    LLVMValueRef value)
 {
-   LLVMBuilderRef builder = bld->base.gallivm->builder;
+   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
    LLVMValueRef mask = NULL;
    LLVMValueRef ptr;
@@ -294,13 +233,13 @@ emit_store(
       break;
 
    case TGSI_SAT_ZERO_ONE:
-      value = lp_build_max(&bld->base, value, bld->base.zero);
-      value = lp_build_min(&bld->base, value, bld->base.one);
+      value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
+      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
       break;
 
    case TGSI_SAT_MINUS_PLUS_ONE:
-      value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0));
-      value = lp_build_min(&bld->base, value, bld->base.one);
+      value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
+      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
       break;
 
    default:
@@ -335,6 +274,8 @@ emit_store(
       return;
    }
 
+   if (!ptr)
+      return;
    /*
     * Predicate
     */
@@ -350,17 +291,17 @@ emit_store(
       /*
        * Convert the value to an integer mask.
        */
-      pred = lp_build_compare(bld->base.gallivm,
-                               bld->base.type,
+      pred = lp_build_compare(bld->bld_base.base.gallivm,
+                               bld->bld_base.base.type,
                                PIPE_FUNC_NOTEQUAL,
                                pred,
-                               bld->base.zero);
+                               bld->bld_base.base.zero);
 
       if (inst->Predicate.Negate) {
          pred = LLVMBuildNot(builder, pred, "");
       }
 
-      pred = swizzle_aos(bld, pred,
+      pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
                          inst->Predicate.SwizzleX,
                          inst->Predicate.SwizzleY,
                          inst->Predicate.SwizzleZ,
@@ -380,7 +321,7 @@ emit_store(
    if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
       LLVMValueRef writemask;
 
-      writemask = lp_build_const_mask_aos(bld->base.gallivm, bld->base.type,
+      writemask = lp_build_const_mask_aos(bld->bld_base.base.gallivm, bld->bld_base.base.type,
                                           reg->Register.WriteMask);
 
       if (mask) {
@@ -394,7 +335,7 @@ emit_store(
       LLVMValueRef orig_value;
 
       orig_value = LLVMBuildLoad(builder, ptr, "");
-      value = lp_build_select(&bld->base,
+      value = lp_build_select(&bld->bld_base.base,
                               mask, value, orig_value);
    }
 
@@ -419,44 +360,44 @@ emit_tex(struct lp_build_tgsi_aos_context *bld,
 
    if (!bld->sampler) {
       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
-      return bld->base.undef;
+      return bld->bld_base.base.undef;
    }
 
    target = inst->Texture.Texture;
 
-   coords = emit_fetch( bld, inst, 0 );
+   coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
 
    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
-      ddx = emit_fetch( bld, inst, 1 );
-      ddy = emit_fetch( bld, inst, 2 );
+      ddx = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
+      ddy = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
       unit = inst->Src[3].Register.Index;
    }  else {
 #if 0
-      ddx = lp_build_ddx( &bld->base, coords );
-      ddy = lp_build_ddy( &bld->base, coords );
+      ddx = lp_build_ddx( &bld->bld_base.base, coords );
+      ddy = lp_build_ddy( &bld->bld_base.base, coords );
 #else
       /* TODO */
-      ddx = bld->base.one;
-      ddy = bld->base.one;
+      ddx = bld->bld_base.base.one;
+      ddy = bld->bld_base.base.one;
 #endif
       unit = inst->Src[1].Register.Index;
    }
 
    return bld->sampler->emit_fetch_texel(bld->sampler,
-                                         &bld->base,
+                                         &bld->bld_base.base,
                                          target, unit,
                                          coords, ddx, ddy,
                                          modifier);
 }
 
 
-static void
-emit_declaration(
+void
+lp_emit_declaration_aos(
    struct lp_build_tgsi_aos_context *bld,
    const struct tgsi_full_declaration *decl)
 {
-   struct gallivm_state *gallivm = bld->base.gallivm;
-   LLVMTypeRef vec_type = lp_build_vec_type(bld->base.gallivm, bld->base.type);
+   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+   LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
 
    unsigned first = decl->Range.First;
    unsigned last = decl->Range.Last;
@@ -468,7 +409,7 @@ emit_declaration(
          assert(idx < LP_MAX_TGSI_TEMPS);
          if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
             LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
-            bld->temps_array = lp_build_array_alloca(bld->base.gallivm,
+            bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
                                                      vec_type, array_size, "");
          } else {
             bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
@@ -501,8 +442,8 @@ emit_declaration(
  * Emit LLVM for one TGSI instruction.
  * \param return TRUE for success, FALSE otherwise
  */
-static boolean
-emit_instruction(
+boolean
+lp_emit_instruction_aos(
    struct lp_build_tgsi_aos_context *bld,
    const struct tgsi_full_instruction *inst,
    const struct tgsi_opcode_info *info,
@@ -527,17 +468,17 @@ emit_instruction(
 
    assert(info->num_dst <= 1);
    if (info->num_dst) {
-      dst0 = bld->base.undef;
+      dst0 = bld->bld_base.base.undef;
    }
 
    switch (inst->Instruction.Opcode) {
    case TGSI_OPCODE_ARL:
-      src0 = emit_fetch(bld, inst, 0);
-      dst0 = lp_build_floor(&bld->base, src0);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      dst0 = lp_build_floor(&bld->bld_base.base, src0);
       break;
 
    case TGSI_OPCODE_MOV:
-      dst0 = emit_fetch(bld, inst, 0);
+      dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
       break;
 
    case TGSI_OPCODE_LIT:
@@ -545,15 +486,15 @@ emit_instruction(
 
    case TGSI_OPCODE_RCP:
    /* TGSI_OPCODE_RECIP */
-      src0 = emit_fetch(bld, inst, 0);
-      dst0 = lp_build_rcp(&bld->base, src0);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      dst0 = lp_build_rcp(&bld->bld_base.base, src0);
       break;
 
    case TGSI_OPCODE_RSQ:
    /* TGSI_OPCODE_RECIPSQRT */
-      src0 = emit_fetch(bld, inst, 0);
-      tmp0 = lp_build_abs(&bld->base, src0);
-      dst0 = lp_build_rsqrt(&bld->base, tmp0);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0);
+      dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
       break;
 
    case TGSI_OPCODE_EXP:
@@ -563,15 +504,15 @@ emit_instruction(
       return FALSE;
 
    case TGSI_OPCODE_MUL:
-      src0 = emit_fetch(bld, inst, 0);
-      src1 = emit_fetch(bld, inst, 1);
-      dst0 = lp_build_mul(&bld->base, src0, src1);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+      dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
       break;
 
    case TGSI_OPCODE_ADD:
-      src0 = emit_fetch(bld, inst, 0);
-      src1 = emit_fetch(bld, inst, 1);
-      dst0 = lp_build_add(&bld->base, src0, src1);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+      dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
       break;
 
    case TGSI_OPCODE_DP3:
@@ -586,121 +527,116 @@ emit_instruction(
       return FALSE;
 
    case TGSI_OPCODE_MIN:
-      src0 = emit_fetch(bld, inst, 0);
-      src1 = emit_fetch(bld, inst, 1);
-      dst0 = lp_build_max(&bld->base, src0, src1);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+      dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
       break;
 
    case TGSI_OPCODE_MAX:
-      src0 = emit_fetch(bld, inst, 0);
-      src1 = emit_fetch(bld, inst, 1);
-      dst0 = lp_build_max(&bld->base, src0, src1);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+      dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
       break;
 
    case TGSI_OPCODE_SLT:
    /* TGSI_OPCODE_SETLT */
-      src0 = emit_fetch(bld, inst, 0);
-      src1 = emit_fetch(bld, inst, 1);
-      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1);
-      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
+      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
       break;
 
    case TGSI_OPCODE_SGE:
    /* TGSI_OPCODE_SETGE */
-      src0 = emit_fetch(bld, inst, 0);
-      src1 = emit_fetch(bld, inst, 1);
-      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1);
-      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
+      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
       break;
 
    case TGSI_OPCODE_MAD:
    /* TGSI_OPCODE_MADD */
-      src0 = emit_fetch(bld, inst, 0);
-      src1 = emit_fetch(bld, inst, 1);
-      src2 = emit_fetch(bld, inst, 2);
-      tmp0 = lp_build_mul(&bld->base, src0, src1);
-      dst0 = lp_build_add(&bld->base, tmp0, src2);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
+      tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
+      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
       break;
 
    case TGSI_OPCODE_SUB:
-      src0 = emit_fetch(bld, inst, 0);
-      src1 = emit_fetch(bld, inst, 1);
-      dst0 = lp_build_sub(&bld->base, src0, src1);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+      dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
       break;
 
    case TGSI_OPCODE_LRP:
-      src0 = emit_fetch(bld, inst, 0);
-      src1 = emit_fetch(bld, inst, 1);
-      src2 = emit_fetch(bld, inst, 2);
-      tmp0 = lp_build_sub(&bld->base, src1, src2);
-      tmp0 = lp_build_mul(&bld->base, src0, tmp0);
-      dst0 = lp_build_add(&bld->base, tmp0, src2);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
+      tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
+      tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
+      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
       break;
 
    case TGSI_OPCODE_CND:
-      src0 = emit_fetch(bld, inst, 0);
-      src1 = emit_fetch(bld, inst, 1);
-      src2 = emit_fetch(bld, inst, 2);
-      tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5);
-      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1);
-      dst0 = lp_build_select(&bld->base, tmp0, src0, src1);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
+      tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5);
+      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1);
+      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1);
       break;
 
    case TGSI_OPCODE_DP2A:
       return FALSE;
 
    case TGSI_OPCODE_FRC:
-      src0 = emit_fetch(bld, inst, 0);
-      tmp0 = lp_build_floor(&bld->base, src0);
-      dst0 = lp_build_sub(&bld->base, src0, tmp0);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      tmp0 = lp_build_floor(&bld->bld_base.base, src0);
+      dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
       break;
 
    case TGSI_OPCODE_CLAMP:
-      src0 = emit_fetch(bld, inst, 0);
-      src1 = emit_fetch(bld, inst, 1);
-      src2 = emit_fetch(bld, inst, 2);
-      tmp0 = lp_build_max(&bld->base, src0, src1);
-      dst0 = lp_build_min(&bld->base, tmp0, src2);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
+      tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
+      dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
       break;
 
    case TGSI_OPCODE_FLR:
-      src0 = emit_fetch(bld, inst, 0);
-      dst0 = lp_build_floor(&bld->base, src0);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      dst0 = lp_build_floor(&bld->bld_base.base, src0);
       break;
 
    case TGSI_OPCODE_ROUND:
-      src0 = emit_fetch(bld, inst, 0);
-      dst0 = lp_build_round(&bld->base, src0);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      dst0 = lp_build_round(&bld->bld_base.base, src0);
       break;
 
    case TGSI_OPCODE_EX2:
-      src0 = emit_fetch(bld, inst, 0);
-      tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
-      dst0 = lp_build_exp2(&bld->base, tmp0);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X);
+      dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
       break;
 
    case TGSI_OPCODE_LG2:
-      src0 = emit_fetch(bld, inst, 0);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
-      dst0 = lp_build_log2(&bld->base, tmp0);
+      dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
       break;
 
    case TGSI_OPCODE_POW:
-      src0 = emit_fetch(bld, inst, 0);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
       src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
-      src1 = emit_fetch(bld, inst, 1);
+      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
       src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
-      dst0 = lp_build_pow(&bld->base, src0, src1);
+      dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
       break;
 
    case TGSI_OPCODE_XPD:
       return FALSE;
 
-   case TGSI_OPCODE_ABS:
-      src0 = emit_fetch(bld, inst, 0);
-      dst0 = lp_build_abs(&bld->base, src0);
-      break;
-
    case TGSI_OPCODE_RCC:
       /* deprecated? */
       assert(0);
@@ -710,9 +646,9 @@ emit_instruction(
       return FALSE;
 
    case TGSI_OPCODE_COS:
-      src0 = emit_fetch(bld, inst, 0);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
-      dst0 = lp_build_cos(&bld->base, tmp0);
+      dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
       break;
 
    case TGSI_OPCODE_DDX:
@@ -748,45 +684,45 @@ emit_instruction(
       return FALSE;
 
    case TGSI_OPCODE_SEQ:
-      src0 = emit_fetch(bld, inst, 0);
-      src1 = emit_fetch(bld, inst, 1);
-      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1);
-      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
+      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
       break;
 
    case TGSI_OPCODE_SFL:
-      dst0 = bld->base.zero;
+      dst0 = bld->bld_base.base.zero;
       break;
 
    case TGSI_OPCODE_SGT:
-      src0 = emit_fetch(bld, inst, 0);
-      src1 = emit_fetch(bld, inst, 1);
-      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1);
-      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
+      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
       break;
 
    case TGSI_OPCODE_SIN:
-      src0 = emit_fetch(bld, inst, 0);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
-      dst0 = lp_build_sin(&bld->base, tmp0);
+      dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
       break;
 
    case TGSI_OPCODE_SLE:
-      src0 = emit_fetch(bld, inst, 0);
-      src1 = emit_fetch(bld, inst, 1);
-      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1);
-      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
+      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
       break;
 
    case TGSI_OPCODE_SNE:
-      src0 = emit_fetch(bld, inst, 0);
-      src1 = emit_fetch(bld, inst, 1);
-      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1);
-      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
+      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
       break;
 
    case TGSI_OPCODE_STR:
-      dst0 = bld->base.one;
+      dst0 = bld->bld_base.base.one;
       break;
 
    case TGSI_OPCODE_TEX:
@@ -834,8 +770,8 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_ARR:
-      src0 = emit_fetch(bld, inst, 0);
-      dst0 = lp_build_round(&bld->base, src0);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      dst0 = lp_build_round(&bld->bld_base.base, src0);
       break;
 
    case TGSI_OPCODE_BRA:
@@ -856,16 +792,16 @@ emit_instruction(
 
    case TGSI_OPCODE_SSG:
    /* TGSI_OPCODE_SGN */
-      tmp0 = emit_fetch(bld, inst, 0);
-      dst0 = lp_build_sgn(&bld->base, tmp0);
+      tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
       break;
 
    case TGSI_OPCODE_CMP:
-      src0 = emit_fetch(bld, inst, 0);
-      src1 = emit_fetch(bld, inst, 1);
-      src2 = emit_fetch(bld, inst, 2);
-      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero);
-      dst0 = lp_build_select(&bld->base, tmp0, src1, src2);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
+      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
+      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
+      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
       break;
 
    case TGSI_OPCODE_SCS:
@@ -934,8 +870,8 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_CEIL:
-      src0 = emit_fetch(bld, inst, 0);
-      dst0 = lp_build_ceil(&bld->base, src0);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      dst0 = lp_build_ceil(&bld->bld_base.base, src0);
       break;
 
    case TGSI_OPCODE_I2F:
@@ -951,8 +887,8 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_TRUNC:
-      src0 = emit_fetch(bld, inst, 0);
-      dst0 = lp_build_trunc(&bld->base, src0);
+      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
+      dst0 = lp_build_trunc(&bld->bld_base.base, src0);
       break;
 
    case TGSI_OPCODE_SHL:
@@ -1028,7 +964,7 @@ emit_instruction(
    }
    
    if (info->num_dst) {
-      emit_store(bld, inst, 0, dst0);
+      lp_emit_store_aos(bld, inst, 0, dst0);
    }
 
    return TRUE;
@@ -1049,13 +985,12 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm,
    struct lp_build_tgsi_aos_context bld;
    struct tgsi_parse_context parse;
    uint num_immediates = 0;
-   uint num_instructions = 0;
    unsigned chan;
    int pc = 0;
 
    /* Setup build context */
    memset(&bld, 0, sizeof bld);
-   lp_build_context_init(&bld.base, gallivm, type);
+   lp_build_context_init(&bld.bld_base.base, gallivm, type);
    lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
 
    for (chan = 0; chan < 4; ++chan) {
@@ -1068,11 +1003,18 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm,
    bld.consts_ptr = consts_ptr;
    bld.sampler = sampler;
    bld.indirect_files = info->indirect_files;
-   bld.instructions = (struct tgsi_full_instruction *)
-                      MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction));
-   bld.max_instructions = LP_MAX_INSTRUCTIONS;
+   bld.bld_base.emit_swizzle = swizzle_aos;
+   bld.bld_base.info = info;
+
+   bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
+   bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
+   bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
+   bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
 
-   if (!bld.instructions) {
+   /* Set opcode actions */
+   lp_set_default_actions_cpu(&bld.bld_base);
+
+   if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
       return;
    }
 
@@ -1084,33 +1026,13 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm,
       switch(parse.FullToken.Token.Type) {
       case TGSI_TOKEN_TYPE_DECLARATION:
          /* Inputs already interpolated */
-         emit_declaration(&bld, &parse.FullToken.FullDeclaration);
+         lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
          break;
 
       case TGSI_TOKEN_TYPE_INSTRUCTION:
-         {
-            /* save expanded instruction */
-            if (num_instructions == bld.max_instructions) {
-               struct tgsi_full_instruction *instructions;
-               instructions = REALLOC(bld.instructions,
-                                      bld.max_instructions
-                                      * sizeof(struct tgsi_full_instruction),
-                                      (bld.max_instructions + LP_MAX_INSTRUCTIONS)
-                                      * sizeof(struct tgsi_full_instruction));
-               if (!instructions) {
-                  break;
-               }
-               bld.instructions = instructions;
-               bld.max_instructions += LP_MAX_INSTRUCTIONS;
-            }
-
-            memcpy(bld.instructions + num_instructions,
-                   &parse.FullToken.FullInstruction,
-                   sizeof(bld.instructions[0]));
-
-            num_instructions++;
-         }
-
+         /* save expanded instruction */
+         lp_bld_tgsi_add_instruction(&bld.bld_base,
+                                     &parse.FullToken.FullInstruction);
          break;
 
       case TGSI_TOKEN_TYPE_IMMEDIATE:
@@ -1144,10 +1066,10 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm,
    }
 
    while (pc != -1) {
-      struct tgsi_full_instruction *instr = bld.instructions + pc;
+      struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
       const struct tgsi_opcode_info *opcode_info =
          tgsi_get_opcode_info(instr->Instruction.Opcode);
-      if (!emit_instruction(&bld, instr, opcode_info, &pc))
+      if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
          _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
                        opcode_info->mnemonic);
    }
@@ -1168,6 +1090,5 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm,
       LLVMDumpModule(module);
    }
 
-   FREE(bld.instructions);
 }
 
index 1ad0b74..2be4195 100644 (file)
@@ -47,6 +47,7 @@
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
 #include "tgsi/tgsi_scan.h"
+#include "lp_bld_tgsi_action.h"
 #include "lp_bld_type.h"
 #include "lp_bld_const.h"
 #include "lp_bld_arit.h"
 #include "lp_bld_printf.h"
 
 
-#define NUM_CHANNELS 4
-
-#define LP_MAX_INSTRUCTIONS 256
-
-
-struct lp_exec_mask {
-   struct lp_build_context *bld;
-
-   boolean has_mask;
-
-   LLVMTypeRef int_vec_type;
-
-   LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
-   int cond_stack_size;
-   LLVMValueRef cond_mask;
-
-   LLVMBasicBlockRef loop_block;
-   LLVMValueRef cont_mask;
-   LLVMValueRef break_mask;
-   LLVMValueRef break_var;
-   struct {
-      LLVMBasicBlockRef loop_block;
-      LLVMValueRef cont_mask;
-      LLVMValueRef break_mask;
-      LLVMValueRef break_var;
-   } loop_stack[LP_MAX_TGSI_NESTING];
-   int loop_stack_size;
-
-   LLVMValueRef ret_mask;
-   struct {
-      int pc;
-      LLVMValueRef ret_mask;
-   } call_stack[LP_MAX_TGSI_NESTING];
-   int call_stack_size;
-
-   LLVMValueRef exec_mask;
-};
-
-struct lp_build_tgsi_soa_context
-{
-   struct lp_build_context base;
-
-   /* Builder for vector integer masks and indices */
-   struct lp_build_context uint_bld;
-
-   /* Builder for scalar elements of shader's data type (float) */
-   struct lp_build_context elem_bld;
-
-   LLVMValueRef consts_ptr;
-   const LLVMValueRef *pos;
-   const LLVMValueRef (*inputs)[NUM_CHANNELS];
-   LLVMValueRef (*outputs)[NUM_CHANNELS];
-
-   const struct lp_build_sampler_soa *sampler;
-
-   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
-   LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
-   LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
-   LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
-
-   /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
-    * set in the indirect_files field.
-    * The temps[] array above is unused then.
-    */
-   LLVMValueRef temps_array;
-
-   /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
-    * set in the indirect_files field.
-    * The outputs[] array above is unused then.
-    */
-   LLVMValueRef outputs_array;
-
-   /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is
-    * set in the indirect_files field.
-    * The inputs[] array above is unused then.
-    */
-   LLVMValueRef inputs_array;
-
-   LLVMValueRef system_values_array;
-
-   const struct tgsi_shader_info *info;
-   /** bitmask indicating which register files are accessed indirectly */
-   unsigned indirect_files;
-
-   struct lp_build_mask_context *mask;
-   struct lp_exec_mask exec_mask;
-
-   struct tgsi_full_instruction *instructions;
-   uint max_instructions;
-};
-
 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
 {
    mask->bld = bld;
@@ -438,15 +348,15 @@ static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
  * \param index  which temporary register
  * \param chan  which channel of the temp register.
  */
-static LLVMValueRef
-get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
+LLVMValueRef
+lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
              unsigned index,
              unsigned chan)
 {
-   LLVMBuilderRef builder = bld->base.gallivm->builder;
+   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    assert(chan < 4);
    if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
-      LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, index * 4 + chan);
+      LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
       return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
    }
    else {
@@ -460,15 +370,15 @@ get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
  * \param index  which output register
  * \param chan  which channel of the output register.
  */
-static LLVMValueRef
-get_output_ptr(struct lp_build_tgsi_soa_context *bld,
+LLVMValueRef
+lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
                unsigned index,
                unsigned chan)
 {
-   LLVMBuilderRef builder = bld->base.gallivm->builder;
+   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    assert(chan < 4);
    if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
-      LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm,
+      LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
                                                  index * 4 + chan);
       return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
    }
@@ -487,15 +397,15 @@ build_gather(struct lp_build_tgsi_soa_context *bld,
              LLVMValueRef base_ptr,
              LLVMValueRef indexes)
 {
-   LLVMBuilderRef builder = bld->base.gallivm->builder;
-   LLVMValueRef res = bld->base.undef;
+   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+   LLVMValueRef res = bld->bld_base.base.undef;
    unsigned i;
 
    /*
     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
     */
-   for (i = 0; i < bld->base.type.length; i++) {
-      LLVMValueRef ii = lp_build_const_int32(bld->base.gallivm, i);
+   for (i = 0; i < bld->bld_base.base.type.length; i++) {
+      LLVMValueRef ii = lp_build_const_int32(bld->bld_base.base.gallivm, i);
       LLVMValueRef index = LLVMBuildExtractElement(builder,
                                                    indexes, ii, "");
       LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
@@ -520,7 +430,7 @@ emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
                   struct lp_exec_mask *mask,
                   LLVMValueRef pred)
 {
-   struct gallivm_state *gallivm = bld->base.gallivm;
+   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
    unsigned i;
 
@@ -537,7 +447,7 @@ emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
    /*
     * Loop over elements of index_vec, store scalar value.
     */
-   for (i = 0; i < bld->base.type.length; i++) {
+   for (i = 0; i < bld->bld_base.base.type.length; i++) {
       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
       LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
@@ -573,7 +483,7 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld,
                    unsigned reg_file, unsigned reg_index,
                    const struct tgsi_src_register *indirect_reg)
 {
-   LLVMBuilderRef builder = bld->base.gallivm->builder;
+   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    struct lp_build_context *uint_bld = &bld->uint_bld;
    /* always use X component of address register */
    unsigned swizzle = indirect_reg->SwizzleX;
@@ -584,7 +494,7 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld,
 
    assert(bld->indirect_files & (1 << reg_file));
 
-   base = lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, reg_index);
+   base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
 
    assert(swizzle < 4);
    rel = LLVMBuildLoad(builder,
@@ -598,9 +508,9 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld,
 
    index = lp_build_add(uint_bld, base, rel);
 
-   max_index = lp_build_const_int_vec(bld->base.gallivm,
+   max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
                                       uint_bld->type,
-                                      bld->info->file_max[reg_file]);
+                                      bld->bld_base.info->file_max[reg_file]);
 
    assert(!uint_bld->type.sign);
    index = lp_build_min(uint_bld, index, max_index);
@@ -608,176 +518,198 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld,
    return index;
 }
 
-
-/**
- * Register fetch.
- */
 static LLVMValueRef
-emit_fetch(
-   struct lp_build_tgsi_soa_context *bld,
-   const struct tgsi_full_instruction *inst,
-   unsigned src_op,
-   const unsigned chan_index )
+emit_fetch_constant(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_full_src_register * reg,
+   const unsigned swizzle)
 {
-   struct gallivm_state *gallivm = bld->base.gallivm;
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
    struct lp_build_context *uint_bld = &bld->uint_bld;
-   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
-   const unsigned swizzle =
-      tgsi_util_get_full_src_register_swizzle(reg, chan_index);
-   LLVMValueRef res;
    LLVMValueRef indirect_index = NULL;
 
-   if (swizzle > 3) {
-      assert(0 && "invalid swizzle in emit_fetch()");
-      return bld->base.undef;
-   }
+   /* XXX: Handle fetching xyzw components as a vector */
+   assert(swizzle != ~0);
 
    if (reg->Register.Indirect) {
       indirect_index = get_indirect_index(bld,
                                           reg->Register.File,
                                           reg->Register.Index,
                                           &reg->Indirect);
-   } else {
-      assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
    }
 
-   switch (reg->Register.File) {
-   case TGSI_FILE_CONSTANT:
-      if (reg->Register.Indirect) {
-         LLVMValueRef swizzle_vec =
-            lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle);
-         LLVMValueRef index_vec;  /* index into the const buffer */
+   if (reg->Register.Indirect) {
+      LLVMValueRef swizzle_vec =
+         lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
+      LLVMValueRef index_vec;  /* index into the const buffer */
 
-         /* index_vec = indirect_index * 4 + swizzle */
-         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
-         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
+      /* index_vec = indirect_index * 4 + swizzle */
+      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
+      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
 
-         /* Gather values from the constant buffer */
-         res = build_gather(bld, bld->consts_ptr, index_vec);
-      }
-      else {
-         LLVMValueRef index;  /* index into the const buffer */
-         LLVMValueRef scalar, scalar_ptr;
+      /* Gather values from the constant buffer */
+      return build_gather(bld, bld->consts_ptr, index_vec);
+   }
+   else {
+      LLVMValueRef index;  /* index into the const buffer */
+      LLVMValueRef scalar, scalar_ptr;
 
-         index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
+      index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
 
-         scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
+      scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
                                    &index, 1, "");
-         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
+      scalar = LLVMBuildLoad(builder, scalar_ptr, "");
 
-         res = lp_build_broadcast_scalar(&bld->base, scalar);
-      }
-      break;
+      return lp_build_broadcast_scalar(&bld->bld_base.base, scalar);
+   }
+}
 
-   case TGSI_FILE_IMMEDIATE:
-      res = bld->immediates[reg->Register.Index][swizzle];
-      assert(res);
-      break;
+static LLVMValueRef
+emit_fetch_immediate(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_full_src_register * reg,
+   const unsigned swizzle)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle];
+   assert(res);
+   return res;
+}
 
-   case TGSI_FILE_INPUT:
-      if (reg->Register.Indirect) {
-         LLVMValueRef swizzle_vec =
-            lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
-         LLVMValueRef length_vec =
-            lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length);
-         LLVMValueRef index_vec;  /* index into the const buffer */
-         LLVMValueRef inputs_array;
-         LLVMTypeRef float4_ptr_type;
+static LLVMValueRef
+emit_fetch_input(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_full_src_register * reg,
+   const unsigned swizzle)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_build_context *uint_bld = &bld->uint_bld;
+   LLVMValueRef indirect_index = NULL;
+   LLVMValueRef res;
 
-         /* index_vec = (indirect_index * 4 + swizzle) * length */
-         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
-         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
-         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
+   if (reg->Register.Indirect) {
+      indirect_index = get_indirect_index(bld,
+                                          reg->Register.File,
+                                          reg->Register.Index,
+                                          &reg->Indirect);
+   }
 
-         /* cast inputs_array pointer to float* */
-         float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
-         inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
+   if (reg->Register.Indirect) {
+      LLVMValueRef swizzle_vec =
+         lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
+      LLVMValueRef length_vec =
+         lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
+      LLVMValueRef index_vec;  /* index into the const buffer */
+      LLVMValueRef inputs_array;
+      LLVMTypeRef float4_ptr_type;
+
+      /* index_vec = (indirect_index * 4 + swizzle) * length */
+      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
+      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
+      index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
+
+      /* cast inputs_array pointer to float* */
+      float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
+      inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
                                          float4_ptr_type, "");
 
-         /* Gather values from the temporary register array */
-         res = build_gather(bld, inputs_array, index_vec);
-      } else {
-         if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
-            LLVMValueRef lindex = lp_build_const_int32(gallivm,
-                                           reg->Register.Index * 4 + swizzle);
-            LLVMValueRef input_ptr =  LLVMBuildGEP(builder,
-                                                   bld->inputs_array, &lindex, 1, "");
-            res = LLVMBuildLoad(builder, input_ptr, "");
-         }
-         else {
-            res = bld->inputs[reg->Register.Index][swizzle];
-         }
-      }
-      assert(res);
-      break;
-
-   case TGSI_FILE_TEMPORARY:
-      if (reg->Register.Indirect) {
-         LLVMValueRef swizzle_vec =
-            lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle);
-         LLVMValueRef length_vec =
-            lp_build_const_int_vec(bld->base.gallivm, uint_bld->type,
-                                   bld->base.type.length);
-         LLVMValueRef index_vec;  /* index into the const buffer */
-         LLVMValueRef temps_array;
-         LLVMTypeRef float4_ptr_type;
-
-         /* index_vec = (indirect_index * 4 + swizzle) * length */
-         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
-         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
-         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
-
-         /* cast temps_array pointer to float* */
-         float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->base.gallivm->context), 0);
-         temps_array = LLVMBuildBitCast(builder, bld->temps_array,
-                                        float4_ptr_type, "");
-
-         /* Gather values from the temporary register array */
-         res = build_gather(bld, temps_array, index_vec);
+      /* Gather values from the temporary register array */
+      res = build_gather(bld, inputs_array, index_vec);
+   } else {
+      if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
+         LLVMValueRef lindex = lp_build_const_int32(gallivm,
+                                        reg->Register.Index * 4 + swizzle);
+         LLVMValueRef input_ptr =  LLVMBuildGEP(builder,
+                                                bld->inputs_array, &lindex, 1, "");
+         res = LLVMBuildLoad(builder, input_ptr, "");
       }
       else {
-         LLVMValueRef temp_ptr;
-         temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
-         res = LLVMBuildLoad(builder, temp_ptr, "");
-         if (!res)
-            return bld->base.undef;
+         res = bld->inputs[reg->Register.Index][swizzle];
       }
-      break;
-
-   case TGSI_FILE_SYSTEM_VALUE:
-      assert(!reg->Register.Indirect);
-      {
-         LLVMValueRef index;  /* index into the system value array */
-         LLVMValueRef scalar, scalar_ptr;
-
-         index = lp_build_const_int32(gallivm,
-                                      reg->Register.Index * 4 + swizzle);
-
-         scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array,
-                                   &index, 1, "");
-         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
+   }
+   assert(res);
+   return res;
+}
 
-         res = lp_build_broadcast_scalar(&bld->base, scalar);
-      }
-      break;
+static LLVMValueRef
+emit_fetch_temporary(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_full_src_register * reg,
+   const unsigned swizzle)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_build_context *uint_bld = &bld->uint_bld;
+   LLVMValueRef indirect_index = NULL;
+   LLVMValueRef res;
 
-   default:
-      assert(0 && "invalid src register in emit_fetch()");
-      return bld->base.undef;
+   if (reg->Register.Indirect) {
+      indirect_index = get_indirect_index(bld,
+                                          reg->Register.File,
+                                          reg->Register.Index,
+                                          &reg->Indirect);
    }
 
-   if (reg->Register.Absolute) {
-      res = lp_build_abs( &bld->base, res );
+   if (reg->Register.Indirect) {
+      LLVMValueRef swizzle_vec =
+         lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
+      LLVMValueRef length_vec =
+         lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
+                                bld->bld_base.base.type.length);
+      LLVMValueRef index_vec;  /* index into the const buffer */
+      LLVMValueRef temps_array;
+      LLVMTypeRef float4_ptr_type;
+
+      /* index_vec = (indirect_index * 4 + swizzle) * length */
+      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
+      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
+      index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
+
+      /* cast temps_array pointer to float* */
+      float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
+      temps_array = LLVMBuildBitCast(builder, bld->temps_array,
+                                     float4_ptr_type, "");
+
+      /* Gather values from the temporary register array */
+      res = build_gather(bld, temps_array, index_vec);
    }
-
-   if (reg->Register.Negate) {
-      res = lp_build_negate( &bld->base, res );
+   else {
+      LLVMValueRef temp_ptr;
+      temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
+      res = LLVMBuildLoad(builder, temp_ptr, "");
+      if (!res)
+         return bld->bld_base.base.undef;
    }
 
    return res;
 }
 
+static LLVMValueRef
+emit_fetch_system_value(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_full_src_register * reg,
+   const unsigned swizzle)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef index;  /* index into the system value array */
+   LLVMValueRef scalar, scalar_ptr;
+
+   assert(!reg->Register.Indirect);
+
+   index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
+
+   scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array, &index, 1, "");
+   scalar = LLVMBuildLoad(builder, scalar_ptr, "");
+
+   return lp_build_broadcast_scalar(&bld->bld_base.base, scalar);
+}
 
 /**
  * Register fetch with derivatives.
@@ -785,27 +717,21 @@ emit_fetch(
 static void
 emit_fetch_deriv(
    struct lp_build_tgsi_soa_context *bld,
-   const struct tgsi_full_instruction *inst,
-   unsigned index,
-   const unsigned chan_index,
+   LLVMValueRef src,
    LLVMValueRef *res,
    LLVMValueRef *ddx,
    LLVMValueRef *ddy)
 {
-   LLVMValueRef src;
-
-   src = emit_fetch(bld, inst, index, chan_index);
-
    if(res)
       *res = src;
 
    /* TODO: use interpolation coeffs for inputs */
 
    if(ddx)
-      *ddx = lp_build_ddx(&bld->base, src);
+      *ddx = lp_build_ddx(&bld->bld_base.base, src);
 
    if(ddy)
-      *ddy = lp_build_ddy(&bld->base, src);
+      *ddy = lp_build_ddy(&bld->bld_base.base, src);
 }
 
 
@@ -818,7 +744,7 @@ emit_fetch_predicate(
    const struct tgsi_full_instruction *inst,
    LLVMValueRef *pred)
 {
-   LLVMBuilderRef builder = bld->base.gallivm->builder;
+   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    unsigned index;
    unsigned char swizzles[4];
    LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
@@ -858,11 +784,11 @@ emit_fetch_predicate(
           * is needlessly causing two comparisons due to storing the intermediate
           * result as float vector instead of an integer mask vector.
           */
-         value = lp_build_compare(bld->base.gallivm,
-                                  bld->base.type,
+         value = lp_build_compare(bld->bld_base.base.gallivm,
+                                  bld->bld_base.base.type,
                                   PIPE_FUNC_NOTEQUAL,
                                   value,
-                                  bld->base.zero);
+                                  bld->bld_base.base.zero);
          if (inst->Predicate.Negate) {
             value = LLVMBuildNot(builder, value, "");
          }
@@ -881,15 +807,16 @@ emit_fetch_predicate(
  * Register store.
  */
 static void
-emit_store(
-   struct lp_build_tgsi_soa_context *bld,
+emit_store_chan(
+   struct lp_build_tgsi_context *bld_base,
    const struct tgsi_full_instruction *inst,
    unsigned index,
    unsigned chan_index,
    LLVMValueRef pred,
    LLVMValueRef value)
 {
-   struct gallivm_state *gallivm = bld->base.gallivm;
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
    struct lp_build_context *uint_bld = &bld->uint_bld;
@@ -900,13 +827,13 @@ emit_store(
       break;
 
    case TGSI_SAT_ZERO_ONE:
-      value = lp_build_max(&bld->base, value, bld->base.zero);
-      value = lp_build_min(&bld->base, value, bld->base.one);
+      value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
+      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
       break;
 
    case TGSI_SAT_MINUS_PLUS_ONE:
-      value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0));
-      value = lp_build_min(&bld->base, value, bld->base.one);
+      value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
+      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
       break;
 
    default:
@@ -919,7 +846,8 @@ emit_store(
                                           reg->Register.Index,
                                           &reg->Indirect);
    } else {
-      assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
+      assert(reg->Register.Index <=
+                             bld->bld_base.info->file_max[reg->Register.File]);
    }
 
    switch( reg->Register.File ) {
@@ -928,7 +856,7 @@ emit_store(
          LLVMValueRef chan_vec =
             lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
          LLVMValueRef length_vec =
-            lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length);
+            lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
          LLVMValueRef index_vec;  /* indexes into the temp registers */
          LLVMValueRef outputs_array;
          LLVMValueRef pixel_offsets;
@@ -937,7 +865,7 @@ emit_store(
 
          /* build pixel offset vector: {0, 1, 2, 3, ...} */
          pixel_offsets = uint_bld->undef;
-         for (i = 0; i < bld->base.type.length; i++) {
+         for (i = 0; i < bld->bld_base.base.type.length; i++) {
             LLVMValueRef ii = lp_build_const_int32(gallivm, i);
             pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
                                                    ii, ii, "");
@@ -959,7 +887,7 @@ emit_store(
                            &bld->exec_mask, pred);
       }
       else {
-         LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index,
+         LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
                                                chan_index);
          lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr);
       }
@@ -971,7 +899,7 @@ emit_store(
             lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
          LLVMValueRef length_vec =
             lp_build_const_int_vec(gallivm, uint_bld->type,
-                                   bld->base.type.length);
+                                   bld->bld_base.base.type.length);
          LLVMValueRef index_vec;  /* indexes into the temp registers */
          LLVMValueRef temps_array;
          LLVMValueRef pixel_offsets;
@@ -980,7 +908,7 @@ emit_store(
 
          /* build pixel offset vector: {0, 1, 2, 3, ...} */
          pixel_offsets = uint_bld->undef; 
-         for (i = 0; i < bld->base.type.length; i++) {
+         for (i = 0; i < bld->bld_base.base.type.length; i++) {
             LLVMValueRef ii = lp_build_const_int32(gallivm, i);
             pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
                                                    ii, ii, "");
@@ -1002,7 +930,7 @@ emit_store(
                            &bld->exec_mask, pred);
       }
       else {
-         LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
+         LLVMValueRef temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
                                               chan_index);
          lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
       }
@@ -1023,6 +951,27 @@ emit_store(
    }
 }
 
+static void
+emit_store(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_full_instruction * inst,
+   const struct tgsi_opcode_info * info,
+   LLVMValueRef dst[4])
+
+{
+   unsigned chan_index;
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+   if(info->num_dst) {
+      LLVMValueRef pred[TGSI_NUM_CHANNELS];
+
+      emit_fetch_predicate( bld, inst, pred );
+
+      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+         emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
+      }
+   }
+}
 
 /**
  * High-level instruction translators.
@@ -1034,7 +983,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
           enum lp_build_tex_modifier modifier,
           LLVMValueRef *texel)
 {
-   LLVMBuilderRef builder = bld->base.gallivm->builder;
+   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    unsigned unit;
    LLVMValueRef lod_bias, explicit_lod;
    LLVMValueRef oow = NULL;
@@ -1047,7 +996,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
    if (!bld->sampler) {
       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
       for (i = 0; i < 4; i++) {
-         texel[i] = bld->base.undef;
+         texel[i] = bld->bld_base.base.undef;
       }
       return;
    }
@@ -1079,12 +1028,12 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
    }
 
    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
-      lod_bias = emit_fetch( bld, inst, 0, 3 );
+      lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
       explicit_lod = NULL;
    }
    else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
       lod_bias = NULL;
-      explicit_lod = emit_fetch( bld, inst, 0, 3 );
+      explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
    }
    else {
       lod_bias = NULL;
@@ -1092,43 +1041,43 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
    }
 
    if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
-      oow = emit_fetch( bld, inst, 0, 3 );
-      oow = lp_build_rcp(&bld->base, oow);
+      oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
+      oow = lp_build_rcp(&bld->bld_base.base, oow);
    }
 
    for (i = 0; i < num_coords; i++) {
-      coords[i] = emit_fetch( bld, inst, 0, i );
+      coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
-         coords[i] = lp_build_mul(&bld->base, coords[i], oow);
+         coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
    }
    for (i = num_coords; i < 3; i++) {
-      coords[i] = bld->base.undef;
+      coords[i] = bld->bld_base.base.undef;
    }
 
    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
-      LLVMValueRef index0 = lp_build_const_int32(bld->base.gallivm, 0);
+      LLVMValueRef index0 = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
       for (i = 0; i < num_coords; i++) {
-         LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
-         LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
+         LLVMValueRef src1 = lp_build_emit_fetch( &bld->bld_base, inst, 1, i );
+         LLVMValueRef src2 = lp_build_emit_fetch( &bld->bld_base, inst, 2, i );
          ddx[i] = LLVMBuildExtractElement(builder, src1, index0, "");
          ddy[i] = LLVMBuildExtractElement(builder, src2, index0, "");
       }
       unit = inst->Src[3].Register.Index;
    }  else {
       for (i = 0; i < num_coords; i++) {
-         ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
-         ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
+         ddx[i] = lp_build_scalar_ddx( &bld->bld_base.base, coords[i] );
+         ddy[i] = lp_build_scalar_ddy( &bld->bld_base.base, coords[i] );
       }
       unit = inst->Src[1].Register.Index;
    }
    for (i = num_coords; i < 3; i++) {
-      ddx[i] = LLVMGetUndef(bld->base.elem_type);
-      ddy[i] = LLVMGetUndef(bld->base.elem_type);
+      ddx[i] = LLVMGetUndef(bld->bld_base.base.elem_type);
+      ddy[i] = LLVMGetUndef(bld->bld_base.base.elem_type);
    }
 
    bld->sampler->emit_fetch_texel(bld->sampler,
-                                  bld->base.gallivm,
-                                  bld->base.type,
+                                  bld->bld_base.base.gallivm,
+                                  bld->bld_base.base.type,
                                   unit, num_coords, coords,
                                   ddx, ddy,
                                   lod_bias, explicit_lod,
@@ -1144,10 +1093,10 @@ near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
    for (i = 0; i < 5; i++) {
       unsigned opcode;
 
-      if (pc + i >= bld->info->num_instructions)
+      if (pc + i >= bld->bld_base.info->num_instructions)
         return TRUE;
 
-      opcode = bld->instructions[pc + i].Instruction.Opcode;
+      opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
 
       if (opcode == TGSI_OPCODE_END)
         return TRUE;
@@ -1182,9 +1131,9 @@ emit_kil(
    const struct tgsi_full_instruction *inst,
    int pc)
 {
-   LLVMBuilderRef builder = bld->base.gallivm->builder;
+   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    const struct tgsi_full_src_register *reg = &inst->Src[0];
-   LLVMValueRef terms[NUM_CHANNELS];
+   LLVMValueRef terms[TGSI_NUM_CHANNELS];
    LLVMValueRef mask;
    unsigned chan_index;
 
@@ -1197,10 +1146,10 @@ emit_kil(
       swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
 
       /* Check if the component has not been already tested. */
-      assert(swizzle < NUM_CHANNELS);
+      assert(swizzle < TGSI_NUM_CHANNELS);
       if( !terms[swizzle] )
          /* TODO: change the comparison operator instead of setting the sign */
-         terms[swizzle] =  emit_fetch(bld, inst, 0, chan_index );
+         terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
    }
 
    mask = NULL;
@@ -1211,7 +1160,7 @@ emit_kil(
          /*
           * If term < 0 then mask = 0 else mask = ~0.
           */
-         chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
+         chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
 
          if(mask)
             mask = LLVMBuildAnd(builder, mask, chan_mask, "");
@@ -1237,10 +1186,9 @@ emit_kil(
  */
 static void
 emit_kilp(struct lp_build_tgsi_soa_context *bld,
-          const struct tgsi_full_instruction *inst,
-         int pc)
+          int pc)
 {
-   LLVMBuilderRef builder = bld->base.gallivm->builder;
+   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    LLVMValueRef mask;
 
    /* For those channels which are "alive", disable fragment shader
@@ -1250,7 +1198,7 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld,
       mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
    }
    else {
-      LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type);
+      LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
       mask = zero;
    }
 
@@ -1268,7 +1216,7 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld,
 static void
 emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
 {
-   struct gallivm_state *gallivm = bld->base.gallivm;
+   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef temp_ptr;
    LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
@@ -1276,7 +1224,7 @@ emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
    LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
    LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
    int index;
-   int n = bld->info->file_max[TGSI_FILE_TEMPORARY];
+   int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
 
    for (index = 0; index < n; index++) {
       LLVMValueRef idx = lp_build_const_int32(gallivm, index);
@@ -1286,7 +1234,7 @@ emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
       lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
 
       for (chan = 0; chan < 4; chan++) {
-         temp_ptr = get_temp_ptr(bld, index, chan);
+         temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
          res = LLVMBuildLoad(builder, temp_ptr, "");
          v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
          v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
@@ -1307,31 +1255,32 @@ emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
 
 
 
-static void
-emit_declaration(
-   struct lp_build_tgsi_soa_context *bld,
+void
+lp_emit_declaration_soa(
+   struct lp_build_tgsi_context *bld_base,
    const struct tgsi_full_declaration *decl)
 {
-   struct gallivm_state *gallivm = bld->base.gallivm;
-   LLVMTypeRef vec_type = bld->base.vec_type;
+   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+   LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
    const unsigned first = decl->Range.First;
    const unsigned last = decl->Range.Last;
    unsigned idx, i;
 
    for (idx = first; idx <= last; ++idx) {
-      assert(last <= bld->info->file_max[decl->Declaration.File]);
+      assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
       switch (decl->Declaration.File) {
       case TGSI_FILE_TEMPORARY:
          assert(idx < LP_MAX_TGSI_TEMPS);
          if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
-            for (i = 0; i < NUM_CHANNELS; i++)
+            for (i = 0; i < TGSI_NUM_CHANNELS; i++)
                bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
          }
          break;
 
       case TGSI_FILE_OUTPUT:
          if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
-            for (i = 0; i < NUM_CHANNELS; i++)
+            for (i = 0; i < TGSI_NUM_CHANNELS; i++)
                bld->outputs[idx][i] = lp_build_alloca(gallivm,
                                                       vec_type, "output");
          }
@@ -1339,13 +1288,13 @@ emit_declaration(
 
       case TGSI_FILE_ADDRESS:
          assert(idx < LP_MAX_TGSI_ADDRS);
-         for (i = 0; i < NUM_CHANNELS; i++)
+         for (i = 0; i < TGSI_NUM_CHANNELS; i++)
             bld->addr[idx][i] = lp_build_alloca(gallivm, vec_type, "addr");
          break;
 
       case TGSI_FILE_PREDICATE:
          assert(idx < LP_MAX_TGSI_PREDS);
-         for (i = 0; i < NUM_CHANNELS; i++)
+         for (i = 0; i < TGSI_NUM_CHANNELS; i++)
             bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
                                                  "predicate");
          break;
@@ -1358,965 +1307,427 @@ emit_declaration(
 }
 
 
-/**
- * Emit LLVM for one TGSI instruction.
- * \param return TRUE for success, FALSE otherwise
- */
-static boolean
-emit_instruction(
-   struct lp_build_tgsi_soa_context *bld,
-   const struct tgsi_full_instruction *inst,
-   const struct tgsi_opcode_info *info,
-   int *pc)
+void lp_emit_immediate_soa(
+   struct lp_build_tgsi_context *bld_base,
+   const struct tgsi_full_immediate *imm)
 {
-   unsigned chan_index;
-   LLVMValueRef src0, src1, src2;
-   LLVMValueRef tmp0, tmp1, tmp2;
-   LLVMValueRef tmp3 = NULL;
-   LLVMValueRef tmp4 = NULL;
-   LLVMValueRef tmp5 = NULL;
-   LLVMValueRef tmp6 = NULL;
-   LLVMValueRef tmp7 = NULL;
-   LLVMValueRef res;
-   LLVMValueRef dst0[NUM_CHANNELS];
+   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+   struct gallivm_state * gallivm = bld_base->base.gallivm;
 
-   /*
-    * Stores and write masks are handled in a general fashion after the long
-    * instruction opcode switch statement.
-    *
-    * Although not stricitly necessary, we avoid generating instructions for
-    * channels which won't be stored, in cases where's that easy. For some
-    * complex instructions, like texture sampling, it is more convenient to
-    * assume a full writemask and then let LLVM optimization passes eliminate
-    * redundant code.
-    */
+   /* simply copy the immediate values into the next immediates[] slot */
+   unsigned i;
+   const uint size = imm->Immediate.NrTokens - 1;
+   assert(size <= 4);
+   assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
 
-   (*pc)++;
+   for( i = 0; i < size; ++i )
+      bld->immediates[bld->num_immediates][i] =
+              lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
 
-   assert(info->num_dst <= 1);
-   if (info->num_dst) {
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0[chan_index] = bld->base.undef;
-      }
-   }
+   for( i = size; i < 4; ++i )
+      bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
 
-   switch (inst->Instruction.Opcode) {
-   case TGSI_OPCODE_ARL:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         tmp0 = lp_build_floor(&bld->base, tmp0);
-         dst0[chan_index] = tmp0;
-      }
-      break;
+   bld->num_immediates++;
+}
 
-   case TGSI_OPCODE_MOV:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
-      }
-      break;
+static void
+ddx_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_LIT:
-      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ) {
-         dst0[TGSI_CHAN_X] = bld->base.one;
-      }
-      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ) {
-         src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-         dst0[TGSI_CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
-      }
-      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
-         /* XMM[1] = SrcReg[0].yyyy */
-         tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
-         /* XMM[1] = max(XMM[1], 0) */
-         tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
-         /* XMM[2] = SrcReg[0].wwww */
-         tmp2 = emit_fetch( bld, inst, 0, TGSI_CHAN_W );
-         tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
-         tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-         tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
-         dst0[TGSI_CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
-      }
-      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) ) {
-         dst0[TGSI_CHAN_W] = bld->base.one;
-      }
-      break;
+   emit_fetch_deriv(bld, emit_data->args[0], NULL,
+                    &emit_data->output[emit_data->chan], NULL);
+}
 
-   case TGSI_OPCODE_RCP:
-   /* TGSI_OPCODE_RECIP */
-      src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-      res = lp_build_rcp(&bld->base, src0);
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0[chan_index] = res;
-      }
-      break;
+static void
+ddy_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_RSQ:
-   /* TGSI_OPCODE_RECIPSQRT */
-      src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-      src0 = lp_build_abs(&bld->base, src0);
-      res = lp_build_rsqrt(&bld->base, src0);
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0[chan_index] = res;
-      }
-      break;
+   emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
+                    &emit_data->output[emit_data->chan]);
+}
 
-   case TGSI_OPCODE_EXP:
-      if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
-         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
-         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) {
-         LLVMValueRef *p_exp2_int_part = NULL;
-         LLVMValueRef *p_frac_part = NULL;
-         LLVMValueRef *p_exp2 = NULL;
-
-         src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-
-         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
-            p_exp2_int_part = &tmp0;
-         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
-            p_frac_part = &tmp1;
-         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
-            p_exp2 = &tmp2;
-
-         lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
-
-         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
-            dst0[TGSI_CHAN_X] = tmp0;
-         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
-            dst0[TGSI_CHAN_Y] = tmp1;
-         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
-            dst0[TGSI_CHAN_Z] = tmp2;
-      }
-      /* dst.w = 1.0 */
-      if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W )) {
-         dst0[TGSI_CHAN_W] = bld->base.one;
-      }
-      break;
+static void
+kilp_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_LOG:
-      if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
-         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
-         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) {
-         LLVMValueRef *p_floor_log2 = NULL;
-         LLVMValueRef *p_exp = NULL;
-         LLVMValueRef *p_log2 = NULL;
-
-         src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-         src0 = lp_build_abs( &bld->base, src0 );
-
-         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
-            p_floor_log2 = &tmp0;
-         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
-            p_exp = &tmp1;
-         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
-            p_log2 = &tmp2;
-
-         lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
-
-         /* dst.x = floor(lg2(abs(src.x))) */
-         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
-            dst0[TGSI_CHAN_X] = tmp0;
-         /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
-         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y )) {
-            dst0[TGSI_CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
-         }
-         /* dst.z = lg2(abs(src.x)) */
-         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
-            dst0[TGSI_CHAN_Z] = tmp2;
-      }
-      /* dst.w = 1.0 */
-      if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W )) {
-         dst0[TGSI_CHAN_W] = bld->base.one;
-      }
-      break;
+   emit_kilp(bld, bld_base->pc - 1);
+}
 
-   case TGSI_OPCODE_MUL:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         src0 = emit_fetch( bld, inst, 0, chan_index );
-         src1 = emit_fetch( bld, inst, 1, chan_index );
-         dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
-      }
-      break;
+static void
+kil_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_ADD:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         src0 = emit_fetch( bld, inst, 0, chan_index );
-         src1 = emit_fetch( bld, inst, 1, chan_index );
-         dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
-      }
-      break;
+   emit_kil(bld, emit_data->inst, bld_base->pc - 1);
+}
 
-   case TGSI_OPCODE_DP3:
-   /* TGSI_OPCODE_DOT3 */
-      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
-      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
-      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
-      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
-      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
-      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
-      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
-      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
-      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
-      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0[chan_index] = tmp0;
-      }
-      break;
+static void
+tex_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_DP4:
-   /* TGSI_OPCODE_DOT4 */
-      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
-      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
-      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
-      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
-      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
-      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
-      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
-      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
-      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
-      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
-      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_W );
-      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_W );
-      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
-      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0[chan_index] = tmp0;
-      }
-      break;
+   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
+}
 
-   case TGSI_OPCODE_DST:
-      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
-         dst0[TGSI_CHAN_X] = bld->base.one;
-      }
-      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
-         tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
-         tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
-         dst0[TGSI_CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
-      }
-      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
-         dst0[TGSI_CHAN_Z] = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
-      }
-      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
-         dst0[TGSI_CHAN_W] = emit_fetch( bld, inst, 1, TGSI_CHAN_W );
-      }
-      break;
+static void
+txb_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_MIN:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         src0 = emit_fetch( bld, inst, 0, chan_index );
-         src1 = emit_fetch( bld, inst, 1, chan_index );
-         dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
-      }
-      break;
+   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
+            emit_data->output);
+}
 
-   case TGSI_OPCODE_MAX:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         src0 = emit_fetch( bld, inst, 0, chan_index );
-         src1 = emit_fetch( bld, inst, 1, chan_index );
-         dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
-      }
-      break;
+static void
+txd_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_SLT:
-   /* TGSI_OPCODE_SETLT */
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         src0 = emit_fetch( bld, inst, 0, chan_index );
-         src1 = emit_fetch( bld, inst, 1, chan_index );
-         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
-         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
-      }
-      break;
+   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
+            emit_data->output);
+}
 
-   case TGSI_OPCODE_SGE:
-   /* TGSI_OPCODE_SETGE */
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         src0 = emit_fetch( bld, inst, 0, chan_index );
-         src1 = emit_fetch( bld, inst, 1, chan_index );
-         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
-         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
-      }
-      break;
+static void
+txl_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_MAD:
-   /* TGSI_OPCODE_MADD */
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         tmp1 = emit_fetch( bld, inst, 1, chan_index );
-         tmp2 = emit_fetch( bld, inst, 2, chan_index );
-         tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
-         tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
-         dst0[chan_index] = tmp0;
-      }
-      break;
+   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
+            emit_data->output);
+}
 
-   case TGSI_OPCODE_SUB:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         tmp1 = emit_fetch( bld, inst, 1, chan_index );
-         dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
-      }
-      break;
+static void
+txp_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_LRP:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         src0 = emit_fetch( bld, inst, 0, chan_index );
-         src1 = emit_fetch( bld, inst, 1, chan_index );
-         src2 = emit_fetch( bld, inst, 2, chan_index );
-         tmp0 = lp_build_sub( &bld->base, src1, src2 );
-         tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
-         dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
-      }
-      break;
+   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
+            emit_data->output);
+}
 
-   case TGSI_OPCODE_CND:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         src0 = emit_fetch( bld, inst, 0, chan_index );
-         src1 = emit_fetch( bld, inst, 1, chan_index );
-         src2 = emit_fetch( bld, inst, 2, chan_index );
-         tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5);
-         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
-         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
-      }
-      break;
-
-   case TGSI_OPCODE_DP2A:
-      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );  /* xmm0 = src[0].x */
-      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );  /* xmm1 = src[1].x */
-      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
-      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );  /* xmm1 = src[0].y */
-      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );  /* xmm2 = src[1].y */
-      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
-      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
-      tmp1 = emit_fetch( bld, inst, 2, TGSI_CHAN_X );  /* xmm1 = src[2].x */
-      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
-      }
-      break;
+static void
+cal_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_FRC:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         src0 = emit_fetch( bld, inst, 0, chan_index );
-         tmp0 = lp_build_floor(&bld->base, src0);
-         tmp0 = lp_build_sub(&bld->base, src0, tmp0);
-         dst0[chan_index] = tmp0;
-      }
-      break;
+   lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
+                     &bld_base->pc);
+}
 
-   case TGSI_OPCODE_CLAMP:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         src1 = emit_fetch( bld, inst, 1, chan_index );
-         src2 = emit_fetch( bld, inst, 2, chan_index );
-         tmp0 = lp_build_max(&bld->base, tmp0, src1);
-         tmp0 = lp_build_min(&bld->base, tmp0, src2);
-         dst0[chan_index] = tmp0;
-      }
-      break;
+static void
+ret_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_FLR:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
-      }
-      break;
+   lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
+}
 
-   case TGSI_OPCODE_ROUND:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         dst0[chan_index] = lp_build_round(&bld->base, tmp0);
-      }
-      break;
+static void
+brk_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_EX2: {
-      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-      tmp0 = lp_build_exp2( &bld->base, tmp0);
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0[chan_index] = tmp0;
-      }
-      break;
-   }
+   lp_exec_break(&bld->exec_mask);
+}
 
-   case TGSI_OPCODE_LG2:
-      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-      tmp0 = lp_build_log2( &bld->base, tmp0);
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0[chan_index] = tmp0;
-      }
-      break;
+static void
+if_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef tmp;
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_POW:
-      src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-      src1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
-      res = lp_build_pow( &bld->base, src0, src1 );
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0[chan_index] = res;
-      }
-      break;
+   tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
+                      emit_data->args[0], bld->bld_base.base.zero);
+   lp_exec_mask_cond_push(&bld->exec_mask, tmp);
+}
 
-   case TGSI_OPCODE_XPD:
-      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
-         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ) {
-         tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
-         tmp3 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
-      }
-      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
-         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
-         tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
-         tmp4 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
-      }
-      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
-         tmp2 = tmp0;
-         tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
-         tmp5 = tmp3;
-         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
-         tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
-         dst0[TGSI_CHAN_X] = tmp2;
-      }
-      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
-         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
-         tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
-         tmp5 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-      }
-      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
-         tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
-         tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
-         tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
-         dst0[TGSI_CHAN_Y] = tmp3;
-      }
-      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
-         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
-         tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
-         tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
-         dst0[TGSI_CHAN_Z] = tmp5;
-      }
-      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
-         dst0[TGSI_CHAN_W] = bld->base.one;
-      }
-      break;
+static void
+bgnloop_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_ABS:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
-      }
-      break;
+   lp_exec_bgnloop(&bld->exec_mask);
+}
 
-   case TGSI_OPCODE_RCC:
-      /* deprecated? */
-      assert(0);
-      return FALSE;
-
-   case TGSI_OPCODE_DPH:
-      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
-      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
-      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
-      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
-      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
-      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
-      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
-      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
-      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
-      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
-      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_W );
-      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0[chan_index] = tmp0;
-      }
-      break;
+static void
+bgnsub_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_COS:
-      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-      tmp0 = lp_build_cos( &bld->base, tmp0 );
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0[chan_index] = tmp0;
-      }
-      break;
+   lp_exec_mask_bgnsub(&bld->exec_mask);
+}
 
-   case TGSI_OPCODE_DDX:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
-      }
-      break;
+static void
+else_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_DDY:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
-      }
-      break;
+   lp_exec_mask_cond_invert(&bld->exec_mask);
+}
 
-   case TGSI_OPCODE_KILP:
-      /* predicated kill */
-      emit_kilp( bld, inst, (*pc)-1 );
-      break;
+static void
+endif_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_KIL:
-      /* conditional kill */
-      emit_kil( bld, inst, (*pc)-1 );
-      break;
+   lp_exec_mask_cond_pop(&bld->exec_mask);
+}
 
-   case TGSI_OPCODE_PK2H:
-      return FALSE;
-      break;
+static void
+endloop_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_PK2US:
-      return FALSE;
-      break;
+   lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
+}
 
-   case TGSI_OPCODE_PK4B:
-      return FALSE;
-      break;
+static void
+endsub_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_PK4UB:
-      return FALSE;
-      break;
+   lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
+}
 
-   case TGSI_OPCODE_RFL:
-      return FALSE;
-      break;
+static void
+cont_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_SEQ:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         src0 = emit_fetch( bld, inst, 0, chan_index );
-         src1 = emit_fetch( bld, inst, 1, chan_index );
-         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
-         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
-      }
-      break;
+   lp_exec_continue(&bld->exec_mask);
+}
 
-   case TGSI_OPCODE_SFL:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0[chan_index] = bld->base.zero;
-      }
-      break;
+/* XXX: Refactor and move it to lp_bld_tgsi_action.c
+ *
+ * XXX: What do the comments about xmm registers mean?  Maybe they are left over
+ * from old code, but there is no garauntee that LLVM will use those registers
+ * for this code.
+ *
+ * XXX: There should be no calls to lp_build_emit_fetch in this function.  This
+ * should be handled by the emit_data->fetch_args function. */
+static void
+nrm_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef tmp0, tmp1;
+   LLVMValueRef tmp4 = NULL;
+   LLVMValueRef tmp5 = NULL;
+   LLVMValueRef tmp6 = NULL;
+   LLVMValueRef tmp7 = NULL;
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   case TGSI_OPCODE_SGT:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         src0 = emit_fetch( bld, inst, 0, chan_index );
-         src1 = emit_fetch( bld, inst, 1, chan_index );
-         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
-         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
-      }
-      break;
+   uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
 
-   case TGSI_OPCODE_SIN:
-      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-      tmp0 = lp_build_sin( &bld->base, tmp0 );
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0[chan_index] = tmp0;
-      }
-      break;
+  if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
+      TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
+      TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
+      (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
 
-   case TGSI_OPCODE_SLE:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         src0 = emit_fetch( bld, inst, 0, chan_index );
-         src1 = emit_fetch( bld, inst, 1, chan_index );
-         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
-         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
-      }
-      break;
+      /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
 
-   case TGSI_OPCODE_SNE:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         src0 = emit_fetch( bld, inst, 0, chan_index );
-         src1 = emit_fetch( bld, inst, 1, chan_index );
-         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
-         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
+      /* xmm4 = src.x */
+      /* xmm0 = src.x * src.x */
+      tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
+      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
+         tmp4 = tmp0;
       }
-      break;
+      tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
 
-   case TGSI_OPCODE_STR:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0[chan_index] = bld->base.one;
+      /* xmm5 = src.y */
+      /* xmm0 = xmm0 + src.y * src.y */
+      tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
+      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
+         tmp5 = tmp1;
       }
-      break;
-
-   case TGSI_OPCODE_TEX:
-      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
-      break;
-
-   case TGSI_OPCODE_TXD:
-      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
-      break;
-
-   case TGSI_OPCODE_UP2H:
-      /* deprecated */
-      assert (0);
-      return FALSE;
-      break;
-
-   case TGSI_OPCODE_UP2US:
-      /* deprecated */
-      assert(0);
-      return FALSE;
-      break;
-
-   case TGSI_OPCODE_UP4B:
-      /* deprecated */
-      assert(0);
-      return FALSE;
-      break;
-
-   case TGSI_OPCODE_UP4UB:
-      /* deprecated */
-      assert(0);
-      return FALSE;
-      break;
-
-   case TGSI_OPCODE_X2D:
-      /* deprecated? */
-      assert(0);
-      return FALSE;
-      break;
-
-   case TGSI_OPCODE_ARA:
-      /* deprecated */
-      assert(0);
-      return FALSE;
-      break;
+      tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
+      tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
 
-   case TGSI_OPCODE_ARR:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         tmp0 = lp_build_round(&bld->base, tmp0);
-         dst0[chan_index] = tmp0;
+      /* xmm6 = src.z */
+      /* xmm0 = xmm0 + src.z * src.z */
+      tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
+      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
+         tmp6 = tmp1;
       }
-      break;
-
-   case TGSI_OPCODE_BRA:
-      /* deprecated */
-      assert(0);
-      return FALSE;
-      break;
-
-   case TGSI_OPCODE_CAL:
-      lp_exec_mask_call(&bld->exec_mask,
-                        inst->Label.Label,
-                        pc);
+      tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
+      tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
 
-      break;
-
-   case TGSI_OPCODE_RET:
-      lp_exec_mask_ret(&bld->exec_mask, pc);
-      break;
-
-   case TGSI_OPCODE_END:
-      if (0) {
-         /* for debugging */
-         emit_dump_temps(bld);
+      if (dims == 4) {
+         /* xmm7 = src.w */
+         /* xmm0 = xmm0 + src.w * src.w */
+         tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
+         if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
+            tmp7 = tmp1;
+         }
+         tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
+         tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
       }
-      *pc = -1;
-      break;
-
-   case TGSI_OPCODE_SSG:
-   /* TGSI_OPCODE_SGN */
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
+      /* xmm1 = 1 / sqrt(xmm0) */
+      tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
+       /* dst.x = xmm1 * src.x */
+      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
+         emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
       }
-      break;
-
-   case TGSI_OPCODE_CMP:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         src0 = emit_fetch( bld, inst, 0, chan_index );
-         src1 = emit_fetch( bld, inst, 1, chan_index );
-         src2 = emit_fetch( bld, inst, 2, chan_index );
-         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
-         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
+      /* dst.y = xmm1 * src.y */
+      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
+         emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
       }
-      break;
 
-   case TGSI_OPCODE_SCS:
-      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
-         tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-         dst0[TGSI_CHAN_X] = lp_build_cos( &bld->base, tmp0 );
-      }
-      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
-         tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
-         dst0[TGSI_CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
+      /* dst.z = xmm1 * src.z */
+      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
+         emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
       }
-      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
-         dst0[TGSI_CHAN_Z] = bld->base.zero;
+      /* dst.w = xmm1 * src.w */
+      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
+         emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
       }
-      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
-         dst0[TGSI_CHAN_W] = bld->base.one;
-      }
-      break;
-
-   case TGSI_OPCODE_TXB:
-      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
-      break;
-
-   case TGSI_OPCODE_NRM:
-      /* fall-through */
-   case TGSI_OPCODE_NRM4:
-      /* 3 or 4-component normalization */
-      {
-         uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
-
-         if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) ||
-            TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y) ||
-            TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z) ||
-             (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W) && dims == 4)) {
-
-            /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
-
-            /* xmm4 = src.x */
-            /* xmm0 = src.x * src.x */
-            tmp0 = emit_fetch(bld, inst, 0, TGSI_CHAN_X);
-            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) {
-               tmp4 = tmp0;
-            }
-            tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
-
-            /* xmm5 = src.y */
-            /* xmm0 = xmm0 + src.y * src.y */
-            tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_Y);
-            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) {
-               tmp5 = tmp1;
-            }
-            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
-            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
-
-            /* xmm6 = src.z */
-            /* xmm0 = xmm0 + src.z * src.z */
-            tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_Z);
-            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) {
-               tmp6 = tmp1;
-            }
-            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
-            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
-
-            if (dims == 4) {
-               /* xmm7 = src.w */
-               /* xmm0 = xmm0 + src.w * src.w */
-               tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_W);
-               if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W)) {
-                  tmp7 = tmp1;
-               }
-               tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
-               tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
-            }
-
-            /* xmm1 = 1 / sqrt(xmm0) */
-            tmp1 = lp_build_rsqrt( &bld->base, tmp0);
-
-            /* dst.x = xmm1 * src.x */
-            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) {
-               dst0[TGSI_CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
-            }
-
-            /* dst.y = xmm1 * src.y */
-            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) {
-               dst0[TGSI_CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
-            }
-
-            /* dst.z = xmm1 * src.z */
-            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) {
-               dst0[TGSI_CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
-            }
-
-            /* dst.w = xmm1 * src.w */
-            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) && dims == 4) {
-               dst0[TGSI_CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
-            }
-         }
-
-         /* dst.w = 1.0 */
-         if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W) && dims == 3) {
-            dst0[TGSI_CHAN_W] = bld->base.one;
-         }
-      }
-      break;
-
-   case TGSI_OPCODE_DIV:
-      /* deprecated */
-      assert( 0 );
-      return FALSE;
-      break;
-
-   case TGSI_OPCODE_DP2:
-      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );  /* xmm0 = src[0].x */
-      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );  /* xmm1 = src[1].x */
-      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
-      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );  /* xmm1 = src[0].y */
-      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );  /* xmm2 = src[1].y */
-      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
-      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
-      }
-      break;
-
-   case TGSI_OPCODE_TXL:
-      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
-      break;
-
-   case TGSI_OPCODE_TXP:
-      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
-      break;
-
-   case TGSI_OPCODE_BRK:
-      lp_exec_break(&bld->exec_mask);
-      break;
-
-   case TGSI_OPCODE_IF:
-      tmp0 = emit_fetch(bld, inst, 0, TGSI_CHAN_X);
-      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
-                          tmp0, bld->base.zero);
-      lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
-      break;
-
-   case TGSI_OPCODE_BGNLOOP:
-      lp_exec_bgnloop(&bld->exec_mask);
-      break;
-
-   case TGSI_OPCODE_BGNSUB:
-      lp_exec_mask_bgnsub(&bld->exec_mask);
-      break;
-
-   case TGSI_OPCODE_ELSE:
-      lp_exec_mask_cond_invert(&bld->exec_mask);
-      break;
-
-   case TGSI_OPCODE_ENDIF:
-      lp_exec_mask_cond_pop(&bld->exec_mask);
-      break;
-
-   case TGSI_OPCODE_ENDLOOP:
-      lp_exec_endloop(bld->base.gallivm, &bld->exec_mask);
-      break;
+   }
 
-   case TGSI_OPCODE_ENDSUB:
-      lp_exec_mask_endsub(&bld->exec_mask, pc);
-      break;
+   /* dst.w = 1.0 */
+   if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
+       emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
+   }
+}
 
-   case TGSI_OPCODE_PUSHA:
-      /* deprecated? */
-      assert(0);
-      return FALSE;
-      break;
+static void emit_prologue(struct lp_build_tgsi_context * bld_base)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   struct gallivm_state * gallivm = bld_base->base.gallivm;
 
-   case TGSI_OPCODE_POPA:
-      /* deprecated? */
-      assert(0);
-      return FALSE;
-      break;
+   if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
+      LLVMValueRef array_size =
+         lp_build_const_int32(gallivm,
+                         bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
+      bld->temps_array = lp_build_array_alloca(gallivm,
+                                              bld_base->base.vec_type, array_size,
+                                              "temp_array");
+   }
 
-   case TGSI_OPCODE_CEIL:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
-      }
-      break;
+   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
+      LLVMValueRef array_size =
+         lp_build_const_int32(gallivm,
+                            bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
+      bld->outputs_array = lp_build_array_alloca(gallivm,
+                                                bld_base->base.vec_type, array_size,
+                                                "output_array");
+   }
 
-   case TGSI_OPCODE_I2F:
-      /* deprecated? */
-      assert(0);
-      return FALSE;
-      break;
+   /* If we have indirect addressing in inputs we need to copy them into
+    * our alloca array to be able to iterate over them */
+   if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
+      unsigned index, chan;
+      LLVMTypeRef vec_type = bld_base->base.vec_type;
+      LLVMValueRef array_size = lp_build_const_int32(gallivm,
+            bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
+      bld->inputs_array = lp_build_array_alloca(gallivm,
+                                               vec_type, array_size,
+                                               "input_array");
 
-   case TGSI_OPCODE_NOT:
-      /* deprecated? */
-      assert(0);
-      return FALSE;
-      break;
+      assert(bld_base->info->num_inputs
+                        <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
 
-   case TGSI_OPCODE_TRUNC:
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         tmp0 = emit_fetch( bld, inst, 0, chan_index );
-         dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
+      for (index = 0; index < bld_base->info->num_inputs; ++index) {
+         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+            LLVMValueRef lindex =
+               lp_build_const_int32(gallivm, index * 4 + chan);
+            LLVMValueRef input_ptr =
+               LLVMBuildGEP(gallivm->builder, bld->inputs_array,
+                            &lindex, 1, "");
+            LLVMValueRef value = bld->inputs[index][chan];
+            if (value)
+               LLVMBuildStore(gallivm->builder, value, input_ptr);
+         }
       }
-      break;
-
-   case TGSI_OPCODE_SHL:
-      /* deprecated? */
-      assert(0);
-      return FALSE;
-      break;
-
-   case TGSI_OPCODE_ISHR:
-      /* deprecated? */
-      assert(0);
-      return FALSE;
-      break;
-
-   case TGSI_OPCODE_AND:
-      /* deprecated? */
-      assert(0);
-      return FALSE;
-      break;
-
-   case TGSI_OPCODE_OR:
-      /* deprecated? */
-      assert(0);
-      return FALSE;
-      break;
-
-   case TGSI_OPCODE_MOD:
-      /* deprecated? */
-      assert(0);
-      return FALSE;
-      break;
-
-   case TGSI_OPCODE_XOR:
-      /* deprecated? */
-      assert(0);
-      return FALSE;
-      break;
-
-   case TGSI_OPCODE_SAD:
-      /* deprecated? */
-      assert(0);
-      return FALSE;
-      break;
-
-   case TGSI_OPCODE_TXF:
-      /* deprecated? */
-      assert(0);
-      return FALSE;
-      break;
-
-   case TGSI_OPCODE_TXQ:
-      /* deprecated? */
-      assert(0);
-      return FALSE;
-      break;
-
-   case TGSI_OPCODE_CONT:
-      lp_exec_continue(&bld->exec_mask);
-      break;
-
-   case TGSI_OPCODE_EMIT:
-      return FALSE;
-      break;
-
-   case TGSI_OPCODE_ENDPRIM:
-      return FALSE;
-      break;
+   }
+}
 
-   case TGSI_OPCODE_NOP:
-      break;
+static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
 
-   default:
-      return FALSE;
+   if (0) {
+      /* for debugging */
+      emit_dump_temps(bld);
    }
-   
-   if(info->num_dst) {
-      LLVMValueRef pred[NUM_CHANNELS];
 
-      emit_fetch_predicate( bld, inst, pred );
-
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
+   /* If we have indirect addressing in outputs we need to copy our alloca array
+    * to the outputs slots specified by the called */
+   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
+      unsigned index, chan;
+      assert(bld_base->info->num_outputs <=
+                        bld_base->info->file_max[TGSI_FILE_OUTPUT] + 1);
+      for (index = 0; index < bld_base->info->num_outputs; ++index) {
+         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+            bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
+         }
       }
    }
-
-   return TRUE;
 }
 
-
 void
 lp_build_tgsi_soa(struct gallivm_state *gallivm,
                   const struct tgsi_token *tokens,
@@ -2325,17 +1736,12 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
                   LLVMValueRef consts_ptr,
                   LLVMValueRef system_values_array,
                   const LLVMValueRef *pos,
-                  const LLVMValueRef (*inputs)[NUM_CHANNELS],
-                  LLVMValueRef (*outputs)[NUM_CHANNELS],
+                  const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
+                  LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
                   struct lp_build_sampler_soa *sampler,
                   const struct tgsi_shader_info *info)
 {
    struct lp_build_tgsi_soa_context bld;
-   struct tgsi_parse_context parse;
-   uint num_immediates = 0;
-   uint num_instructions = 0;
-   unsigned i;
-   int pc = 0;
 
    struct lp_type res_type;
 
@@ -2347,7 +1753,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
 
    /* Setup build context */
    memset(&bld, 0, sizeof bld);
-   lp_build_context_init(&bld.base, gallivm, type);
+   lp_build_context_init(&bld.bld_base.base, gallivm, type);
    lp_build_context_init(&bld.uint_bld, gallivm, lp_uint_type(type));
    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
    bld.mask = mask;
@@ -2356,145 +1762,55 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
    bld.outputs = outputs;
    bld.consts_ptr = consts_ptr;
    bld.sampler = sampler;
-   bld.info = info;
+   bld.bld_base.info = info;
    bld.indirect_files = info->indirect_files;
-   bld.instructions = (struct tgsi_full_instruction *)
-                      MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
-   bld.max_instructions = LP_MAX_INSTRUCTIONS;
 
-   if (!bld.instructions) {
-      return;
-   }
+   bld.bld_base.soa = TRUE;
+   bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
+   bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
+   bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
+   bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
+   bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
+   bld.bld_base.emit_store = emit_store;
+
+   bld.bld_base.emit_declaration = lp_emit_declaration_soa;
+   bld.bld_base.emit_immediate = lp_emit_immediate_soa;
+
+   bld.bld_base.emit_prologue = emit_prologue;
+   bld.bld_base.emit_epilogue = emit_epilogue;
+
+   /* Set opcode actions */
+   lp_set_default_actions_cpu(&bld.bld_base);
+
+   bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
+
+   lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
 
-   lp_exec_mask_init(&bld.exec_mask, &bld.base);
-
-   if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
-      LLVMValueRef array_size =
-         lp_build_const_int32(gallivm,
-                              info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
-      bld.temps_array = lp_build_array_alloca(gallivm,
-                                              bld.base.vec_type, array_size,
-                                              "temp_array");
-   }
-
-   if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
-      LLVMValueRef array_size =
-         lp_build_const_int32(gallivm,
-                              info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
-      bld.outputs_array = lp_build_array_alloca(gallivm,
-                                                bld.base.vec_type, array_size,
-                                                "output_array");
-   }
-
-   /* If we have indirect addressing in inputs we need to copy them into
-    * our alloca array to be able to iterate over them */
-   if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) {
-      unsigned index, chan;
-      LLVMTypeRef vec_type = bld.base.vec_type;
-      LLVMValueRef array_size =
-         lp_build_const_int32(gallivm, info->file_max[TGSI_FILE_INPUT]*4 + 4);
-      bld.inputs_array = lp_build_array_alloca(gallivm,
-                                               vec_type, array_size,
-                                               "input_array");
-
-      assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1);
-
-      for (index = 0; index < info->num_inputs; ++index) {
-         for (chan = 0; chan < NUM_CHANNELS; ++chan) {
-            LLVMValueRef lindex =
-               lp_build_const_int32(gallivm, index * 4 + chan);
-            LLVMValueRef input_ptr =
-               LLVMBuildGEP(gallivm->builder, bld.inputs_array,
-                            &lindex, 1, "");
-            LLVMValueRef value = bld.inputs[index][chan];
-            if (value)
-               LLVMBuildStore(gallivm->builder, value, input_ptr);
-         }
-      }
-   }
 
    bld.system_values_array = system_values_array;
 
-   tgsi_parse_init( &parse, tokens );
-
-   while( !tgsi_parse_end_of_tokens( &parse ) ) {
-      tgsi_parse_token( &parse );
-
-      switch( parse.FullToken.Token.Type ) {
-      case TGSI_TOKEN_TYPE_DECLARATION:
-         /* Inputs already interpolated */
-         emit_declaration( &bld, &parse.FullToken.FullDeclaration );
-         break;
-
-      case TGSI_TOKEN_TYPE_INSTRUCTION:
-         {
-            /* save expanded instruction */
-            if (num_instructions == bld.max_instructions) {
-               struct tgsi_full_instruction *instructions;
-               instructions = REALLOC(bld.instructions,
-                                      bld.max_instructions
-                                      * sizeof(struct tgsi_full_instruction),
-                                      (bld.max_instructions + LP_MAX_INSTRUCTIONS)
-                                      * sizeof(struct tgsi_full_instruction));
-               if (!instructions) {
-                  break;
-               }
-               bld.instructions = instructions;
-               bld.max_instructions += LP_MAX_INSTRUCTIONS;
-            }
-
-            memcpy(bld.instructions + num_instructions,
-                   &parse.FullToken.FullInstruction,
-                   sizeof(bld.instructions[0]));
-
-            num_instructions++;
-         }
-
-         break;
-
-      case TGSI_TOKEN_TYPE_IMMEDIATE:
-         /* simply copy the immediate values into the next immediates[] slot */
-         {
-            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
-            assert(size <= 4);
-            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
-            for( i = 0; i < size; ++i )
-               bld.immediates[num_immediates][i] =
-                  lp_build_const_vec(gallivm, type, parse.FullToken.FullImmediate.u[i].Float);
-            for( i = size; i < 4; ++i )
-               bld.immediates[num_immediates][i] = bld.base.undef;
-            num_immediates++;
-         }
-         break;
-
-      case TGSI_TOKEN_TYPE_PROPERTY:
-         break;
-
-      default:
-         assert( 0 );
-      }
-   }
-
-   while (pc != -1) {
-      struct tgsi_full_instruction *instr = bld.instructions + pc;
-      const struct tgsi_opcode_info *opcode_info =
-         tgsi_get_opcode_info(instr->Instruction.Opcode);
-      if (!emit_instruction( &bld, instr, opcode_info, &pc ))
-         _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
-                       opcode_info->mnemonic);
-   }
-
-   /* If we have indirect addressing in outputs we need to copy our alloca array
-    * to the outputs slots specified by the called */
-   if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
-      unsigned index, chan;
-      assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1);
-      for (index = 0; index < info->num_outputs; ++index) {
-         for (chan = 0; chan < NUM_CHANNELS; ++chan) {
-            bld.outputs[index][chan] = get_output_ptr(&bld, index, chan);
-         }
-      }
-   }
+   lp_build_tgsi_llvm(&bld.bld_base, tokens);
 
    if (0) {
       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
@@ -2504,7 +1820,6 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
       lp_debug_dump_value(function);
       debug_printf("2222222222222222222222222222 \n");
    }
-   tgsi_parse_free( &parse );
 
    if (0) {
       LLVMModuleRef module = LLVMGetGlobalParent(
@@ -2512,8 +1827,6 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
       LLVMDumpModule(module);
 
    }
-
-   FREE( bld.instructions );
 }