From: Eric Anholt <eric@anholt.net>
Date: Tue, 27 Jul 2010 00:47:59 +0000 (-0700)
Subject: Merge remote branch 'origin/master' into glsl2
X-Git-Tag: mesa-7.9-rc1~1173^2~288
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=afe125e0a18ac3886c45c7e6b02b122fb2d327b5;p=platform%2Fupstream%2Fmesa.git

Merge remote branch 'origin/master' into glsl2

This pulls in multiple i965 driver fixes which will help ensure better
testing coverage during development, and also gets past the conflicts
of the src/mesa/shader -> src/mesa/program move.

Conflicts:
	src/mesa/Makefile
	src/mesa/main/shaderapi.c
	src/mesa/main/shaderobj.h
---

afe125e0a18ac3886c45c7e6b02b122fb2d327b5
diff --cc src/glsl/Makefile
index f4b0fb55a78,ca7f2d2ac7d..462d49e8840
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@@ -4,158 -4,12 +4,158 @@@ TOP = ../.
  
  include $(TOP)/configs/current
  
 -SUBDIRS = pp cl apps
 +LIBNAME = glsl
  
 -default install clean:
 -	@for dir in $(SUBDIRS) ; do \
 -		if [ -d $$dir ] ; then \
 -			(cd $$dir && $(MAKE) $@) || exit 1; \
 -		fi \
 -	done
 +LIBGLCPP_SOURCES = \
 +	glcpp/glcpp-lex.c \
 +	glcpp/glcpp-parse.c \
 +	glcpp/pp.c \
 +	glcpp/xtalloc.c
  
 +GLCPP_SOURCES = \
 +	$(LIBGLCPP_SOURCES) \
 +	glcpp/glcpp.c
 +
 +C_SOURCES = \
 +	$(LIBGLCPP_SOURCES)
 +
 +CXX_SOURCES = \
 +	ast_expr.cpp \
 +	ast_function.cpp \
 +	ast_to_hir.cpp \
 +	ast_type.cpp \
 +	builtin_function.cpp \
 +	glsl_lexer.cpp \
 +	glsl_parser.cpp \
 +	glsl_parser_extras.cpp \
 +	glsl_types.cpp \
 +	hir_field_selection.cpp \
 +	ir_basic_block.cpp \
 +	ir_clone.cpp \
 +	ir_constant_expression.cpp \
 +	ir_constant_folding.cpp \
 +	ir_constant_variable.cpp \
 +	ir_copy_propagation.cpp \
 +	ir.cpp \
 +	ir_dead_code.cpp \
 +	ir_dead_code_local.cpp \
 +	ir_div_to_mul_rcp.cpp \
 +	ir_expression_flattening.cpp \
 +	ir_function_can_inline.cpp \
 +	ir_function.cpp \
 +	ir_function_inlining.cpp \
 +	ir_hierarchical_visitor.cpp \
 +	ir_hv_accept.cpp \
 +	ir_if_return.cpp \
 +	ir_if_simplification.cpp \
 +	ir_if_to_cond_assign.cpp \
 +	ir_import_prototypes.cpp \
 +	ir_mat_op_to_vec.cpp \
 +	ir_mod_to_fract.cpp \
 +	ir_print_visitor.cpp \
 +	ir_reader.cpp \
 +	ir_swizzle_swizzle.cpp \
 +	ir_validate.cpp \
 +	ir_variable.cpp \
 +	ir_vec_index_to_cond_assign.cpp \
 +	ir_vec_index_to_swizzle.cpp \
 +	linker.cpp \
 +	link_functions.cpp \
 +	s_expression.cpp
 +
 +LIBS = \
 +	$(TOP)/src/glsl/libglsl.a \
 +	$(shell pkg-config --libs talloc)
 +
 +APPS = glsl_compiler glcpp/glcpp
 +
 +GLSL2_C_SOURCES = \
- 	../mesa/shader/hash_table.c \
- 	../mesa/shader/symbol_table.c
++	../mesa/program/hash_table.c \
++	../mesa/program/symbol_table.c
 +GLSL2_CXX_SOURCES = \
 +	main.cpp
 +
 +GLSL2_OBJECTS = \
 +	$(GLSL2_C_SOURCES:.c=.o) \
 +	$(GLSL2_CXX_SOURCES:.cpp=.o)
 +
 +### Basic defines ###
 +
 +DEFINES = \
 +	$(LIBRARY_DEFINES) \
 +	$(API_DEFINES)
 +
 +GLCPP_OBJECTS = \
 +	$(GLCPP_SOURCES:.c=.o) \
- 	../mesa/shader/hash_table.o
++	../mesa/program/hash_table.o
 +
 +OBJECTS = \
 +	$(C_SOURCES:.c=.o) \
 +	$(CXX_SOURCES:.cpp=.o)
 +
 +INCLUDES = \
 +	-I. \
 +	-I../mesa \
 +	-I../mapi \
- 	-I../mesa/shader \
++	-I../mesa/program \
 +	-I../../include \
 +	$(LIBRARY_INCLUDES)
 +
 +ALL_SOURCES = \
 +	$(C_SOURCES) \
 +	$(CXX_SOURCES) \
 +	$(GLSL2_CXX_SOURCES) \
 +	$(GLSL2_C_SOURCES)
 +
 +##### TARGETS #####
 +
 +default: depend lib$(LIBNAME).a $(APPS)
 +
 +lib$(LIBNAME).a: $(OBJECTS) Makefile $(TOP)/src/glsl/Makefile.template
 +	$(MKLIB) -cplusplus -o $(LIBNAME) -static $(OBJECTS)
 +
 +depend: $(ALL_SOURCES) Makefile
 +	rm -f depend
 +	touch depend
 +	$(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(ALL_SOURCES) 2> /dev/null
 +
 +# Remove .o and backup files
 +clean:
 +	rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak
 +	-rm -f $(APPS)
 +
 +# Dummy target
 +install:
 +	@echo -n ""
 +
 +
 +##### RULES #####
 +
 +glsl_compiler: $(GLSL2_OBJECTS) libglsl.a
 +	$(APP_CXX) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(GLSL2_OBJECTS) $(LIBS) -o $@
 +
 +glcpp/glcpp: $(GLCPP_OBJECTS) libglsl.a
 +	$(APP_CC) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(GLCPP_OBJECTS) $(LIBS) -o $@
 +
 +.cpp.o:
 +	$(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< -o $@
 +
 +.c.o:
 +	$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
 +
 +glsl_lexer.cpp: glsl_lexer.lpp
 +	flex --never-interactive --outfile="$@"  $<
 +
 +glsl_parser.cpp: glsl_parser.ypp
 +	bison -v -o "$@" --defines=glsl_parser.h $<
 +
 +glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
 +	flex --never-interactive --outfile="$@" $<
 +
 +glcpp/glcpp-parse.c: glcpp/glcpp-parse.y
 +	bison -v -o "$@" --defines=glcpp/glcpp-parse.h $<
 +
 +builtin_function.cpp: builtins/*/*
 +	./builtins/tools/generate_builtins.pl > builtin_function.cpp
 +
 +-include depend
diff --cc src/glsl/glsl_symbol_table.h
index 8fbc66c974d,00000000000..27e825597c5
mode 100644,000000..100644
--- a/src/glsl/glsl_symbol_table.h
+++ b/src/glsl/glsl_symbol_table.h
@@@ -1,165 -1,0 +1,165 @@@
 +/* -*- c++ -*- */
 +/*
 + * Copyright Â© 2010 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the next
 + * paragraph) shall be included in all copies or substantial portions of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 + * DEALINGS IN THE SOFTWARE.
 + */
 +
 +#pragma once
 +#ifndef GLSL_SYMBOL_TABLE
 +#define GLSL_SYMBOL_TABLE
 +
 +#include <new>
 +
 +extern "C" {
- #include "symbol_table.h"
++#include "program/symbol_table.h"
 +}
 +#include "ir.h"
 +#include "glsl_types.h"
 +
 +/**
 + * Facade class for _mesa_symbol_table
 + *
 + * Wraps the existing \c _mesa_symbol_table data structure to enforce some
 + * type safe and some symbol table invariants.
 + */
 +class glsl_symbol_table {
 +private:
 +   enum glsl_symbol_name_space {
 +      glsl_variable_name_space = 0,
 +      glsl_type_name_space = 1,
 +      glsl_function_name_space = 2
 +   };
 +
 +   static int
 +   _glsl_symbol_table_destructor (glsl_symbol_table *table)
 +   {
 +      table->~glsl_symbol_table();
 +
 +      return 0;
 +   }
 +
 +public:
 +   /* Callers of this talloc-based new need not call delete. It's
 +    * easier to just talloc_free 'ctx' (or any of its ancestors). */
 +   static void* operator new(size_t size, void *ctx)
 +   {
 +      void *table;
 +
 +      table = talloc_size(ctx, size);
 +      assert(table != NULL);
 +
 +      talloc_set_destructor(table, (int (*)(void*)) _glsl_symbol_table_destructor);
 +
 +      return table;
 +   }
 +
 +   /* If the user *does* call delete, that's OK, we will just
 +    * talloc_free in that case. Here, C++ will have already called the
 +    * destructor so tell talloc not to do that again. */
 +   static void operator delete(void *table)
 +   {
 +      talloc_set_destructor(table, NULL);
 +      talloc_free(table);
 +   }
 +   
 +   glsl_symbol_table()
 +   {
 +      table = _mesa_symbol_table_ctor();
 +   }
 +
 +   ~glsl_symbol_table()
 +   {
 +      _mesa_symbol_table_dtor(table);
 +   }
 +
 +   void push_scope()
 +   {
 +      _mesa_symbol_table_push_scope(table);
 +   }
 +
 +   void pop_scope()
 +   {
 +      _mesa_symbol_table_pop_scope(table);
 +   }
 +
 +   /**
 +    * Determine whether a name was declared at the current scope
 +    */
 +   bool name_declared_this_scope(const char *name)
 +   {
 +      return _mesa_symbol_table_symbol_scope(table, -1, name) == 0;
 +   }
 +
 +   /**
 +    * \name Methods to add symbols to the table
 +    *
 +    * There is some temptation to rename all these functions to \c add_symbol
 +    * or similar.  However, this breaks symmetry with the getter functions and
 +    * reduces the clarity of the intention of code that uses these methods.
 +    */
 +   /*@{*/
 +   bool add_variable(const char *name, ir_variable *v)
 +   {
 +      return _mesa_symbol_table_add_symbol(table, glsl_variable_name_space,
 +					   name, v) == 0;
 +   }
 +
 +   bool add_type(const char *name, const glsl_type *t)
 +   {
 +      return _mesa_symbol_table_add_symbol(table, glsl_type_name_space,
 +					   name, (void *) t) == 0;
 +   }
 +
 +   bool add_function(const char *name, ir_function *f)
 +   {
 +      return _mesa_symbol_table_add_symbol(table, glsl_function_name_space,
 +					   name, f) == 0;
 +   }
 +   /*@}*/
 +
 +   /**
 +    * \name Methods to get symbols from the table
 +    */
 +   /*@{*/
 +   ir_variable *get_variable(const char *name)
 +   {
 +      return (ir_variable *)
 +	 _mesa_symbol_table_find_symbol(table, glsl_variable_name_space, name);
 +   }
 +
 +   glsl_type *get_type(const char *name)
 +   {
 +      return (glsl_type *)
 +	 _mesa_symbol_table_find_symbol(table, glsl_type_name_space, name);
 +   }
 +
 +   ir_function *get_function(const char *name)
 +   {
 +      return (ir_function *)
 +	 _mesa_symbol_table_find_symbol(table, glsl_function_name_space, name);
 +   }
 +   /*@}*/
 +
 +private:
 +   struct _mesa_symbol_table *table;
 +};
 +
 +#endif /* GLSL_SYMBOL_TABLE */
diff --cc src/glsl/linker.cpp
index 7c30a40a6ce,00000000000..ea0274eac33
mode 100644,000000..100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@@ -1,1305 -1,0 +1,1305 @@@
 +/*
 + * Copyright Â© 2010 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the next
 + * paragraph) shall be included in all copies or substantial portions of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 + * DEALINGS IN THE SOFTWARE.
 + */
 +
 +/**
 + * \file linker.cpp
 + * GLSL linker implementation
 + *
 + * Given a set of shaders that are to be linked to generate a final program,
 + * there are three distinct stages.
 + *
 + * In the first stage shaders are partitioned into groups based on the shader
 + * type.  All shaders of a particular type (e.g., vertex shaders) are linked
 + * together.
 + *
 + *   - Undefined references in each shader are resolve to definitions in
 + *     another shader.
 + *   - Types and qualifiers of uniforms, outputs, and global variables defined
 + *     in multiple shaders with the same name are verified to be the same.
 + *   - Initializers for uniforms and global variables defined
 + *     in multiple shaders with the same name are verified to be the same.
 + *
 + * The result, in the terminology of the GLSL spec, is a set of shader
 + * executables for each processing unit.
 + *
 + * After the first stage is complete, a series of semantic checks are performed
 + * on each of the shader executables.
 + *
 + *   - Each shader executable must define a \c main function.
 + *   - Each vertex shader executable must write to \c gl_Position.
 + *   - Each fragment shader executable must write to either \c gl_FragData or
 + *     \c gl_FragColor.
 + *
 + * In the final stage individual shader executables are linked to create a
 + * complete exectuable.
 + *
 + *   - Types of uniforms defined in multiple shader stages with the same name
 + *     are verified to be the same.
 + *   - Initializers for uniforms defined in multiple shader stages with the
 + *     same name are verified to be the same.
 + *   - Types and qualifiers of outputs defined in one stage are verified to
 + *     be the same as the types and qualifiers of inputs defined with the same
 + *     name in a later stage.
 + *
 + * \author Ian Romanick <ian.d.romanick@intel.com>
 + */
 +#include <cstdlib>
 +#include <cstdio>
 +#include <cstdarg>
 +#include <climits>
 +
 +extern "C" {
 +#include <talloc.h>
 +}
 +
 +#include "main/mtypes.h"
 +#include "main/macros.h"
++#include "main/shaderobj.h"
 +#include "glsl_symbol_table.h"
 +#include "ir.h"
 +#include "program.h"
 +#include "hash_table.h"
- #include "shader_api.h"
 +#include "linker.h"
 +#include "ir_optimization.h"
 +
 +/**
 + * Visitor that determines whether or not a variable is ever written.
 + */
 +class find_assignment_visitor : public ir_hierarchical_visitor {
 +public:
 +   find_assignment_visitor(const char *name)
 +      : name(name), found(false)
 +   {
 +      /* empty */
 +   }
 +
 +   virtual ir_visitor_status visit_enter(ir_assignment *ir)
 +   {
 +      ir_variable *const var = ir->lhs->variable_referenced();
 +
 +      if (strcmp(name, var->name) == 0) {
 +	 found = true;
 +	 return visit_stop;
 +      }
 +
 +      return visit_continue_with_parent;
 +   }
 +
 +   bool variable_found()
 +   {
 +      return found;
 +   }
 +
 +private:
 +   const char *name;       /**< Find writes to a variable with this name. */
 +   bool found;             /**< Was a write to the variable found? */
 +};
 +
 +
 +void
 +linker_error_printf(gl_shader_program *prog, const char *fmt, ...)
 +{
 +   va_list ap;
 +
 +   prog->InfoLog = talloc_strdup_append(prog->InfoLog, "error: ");
 +   va_start(ap, fmt);
 +   prog->InfoLog = talloc_vasprintf_append(prog->InfoLog, fmt, ap);
 +   va_end(ap);
 +}
 +
 +
 +void
 +invalidate_variable_locations(gl_shader *sh, enum ir_variable_mode mode,
 +			      int generic_base)
 +{
 +   foreach_list(node, sh->ir) {
 +      ir_variable *const var = ((ir_instruction *) node)->as_variable();
 +
 +      if ((var == NULL) || (var->mode != (unsigned) mode))
 +	 continue;
 +
 +      /* Only assign locations for generic attributes / varyings / etc.
 +       */
 +      if (var->location >= generic_base)
 +	  var->location = -1;
 +   }
 +}
 +
 +
 +/**
 + * Determine the number of attribute slots required for a particular type
 + *
 + * This code is here because it implements the language rules of a specific
 + * GLSL version.  Since it's a property of the language and not a property of
 + * types in general, it doesn't really belong in glsl_type.
 + */
 +unsigned
 +count_attribute_slots(const glsl_type *t)
 +{
 +   /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec:
 +    *
 +    *     "A scalar input counts the same amount against this limit as a vec4,
 +    *     so applications may want to consider packing groups of four
 +    *     unrelated float inputs together into a vector to better utilize the
 +    *     capabilities of the underlying hardware. A matrix input will use up
 +    *     multiple locations.  The number of locations used will equal the
 +    *     number of columns in the matrix."
 +    *
 +    * The spec does not explicitly say how arrays are counted.  However, it
 +    * should be safe to assume the total number of slots consumed by an array
 +    * is the number of entries in the array multiplied by the number of slots
 +    * consumed by a single element of the array.
 +    */
 +
 +   if (t->is_array())
 +      return t->array_size() * count_attribute_slots(t->element_type());
 +
 +   if (t->is_matrix())
 +      return t->matrix_columns;
 +
 +   return 1;
 +}
 +
 +
 +/**
 + * Verify that a vertex shader executable meets all semantic requirements
 + *
 + * \param shader  Vertex shader executable to be verified
 + */
 +bool
 +validate_vertex_shader_executable(struct gl_shader_program *prog,
 +				  struct gl_shader *shader)
 +{
 +   if (shader == NULL)
 +      return true;
 +
 +   find_assignment_visitor find("gl_Position");
 +   find.run(shader->ir);
 +   if (!find.variable_found()) {
 +      linker_error_printf(prog,
 +			  "vertex shader does not write to `gl_Position'\n");
 +      return false;
 +   }
 +
 +   return true;
 +}
 +
 +
 +/**
 + * Verify that a fragment shader executable meets all semantic requirements
 + *
 + * \param shader  Fragment shader executable to be verified
 + */
 +bool
 +validate_fragment_shader_executable(struct gl_shader_program *prog,
 +				    struct gl_shader *shader)
 +{
 +   if (shader == NULL)
 +      return true;
 +
 +   find_assignment_visitor frag_color("gl_FragColor");
 +   find_assignment_visitor frag_data("gl_FragData");
 +
 +   frag_color.run(shader->ir);
 +   frag_data.run(shader->ir);
 +
 +   if (frag_color.variable_found() && frag_data.variable_found()) {
 +      linker_error_printf(prog,  "fragment shader writes to both "
 +			  "`gl_FragColor' and `gl_FragData'\n");
 +      return false;
 +   }
 +
 +   return true;
 +}
 +
 +
 +/**
 + * Generate a string describing the mode of a variable
 + */
 +static const char *
 +mode_string(const ir_variable *var)
 +{
 +   switch (var->mode) {
 +   case ir_var_auto:
 +      return (var->read_only) ? "global constant" : "global variable";
 +
 +   case ir_var_uniform: return "uniform";
 +   case ir_var_in:      return "shader input";
 +   case ir_var_out:     return "shader output";
 +   case ir_var_inout:   return "shader inout";
 +
 +   case ir_var_temporary:
 +   default:
 +      assert(!"Should not get here.");
 +      return "invalid variable";
 +   }
 +}
 +
 +
 +/**
 + * Perform validation of global variables used across multiple shaders
 + */
 +bool
 +cross_validate_globals(struct gl_shader_program *prog,
 +		       struct gl_shader **shader_list,
 +		       unsigned num_shaders,
 +		       bool uniforms_only)
 +{
 +   /* Examine all of the uniforms in all of the shaders and cross validate
 +    * them.
 +    */
 +   glsl_symbol_table variables;
 +   for (unsigned i = 0; i < num_shaders; i++) {
 +      foreach_list(node, shader_list[i]->ir) {
 +	 ir_variable *const var = ((ir_instruction *) node)->as_variable();
 +
 +	 if (var == NULL)
 +	    continue;
 +
 +	 if (uniforms_only && (var->mode != ir_var_uniform))
 +	    continue;
 +
 +	 /* Don't cross validate temporaries that are at global scope.  These
 +	  * will eventually get pulled into the shaders 'main'.
 +	  */
 +	 if (var->mode == ir_var_temporary)
 +	    continue;
 +
 +	 /* If a global with this name has already been seen, verify that the
 +	  * new instance has the same type.  In addition, if the globals have
 +	  * initializers, the values of the initializers must be the same.
 +	  */
 +	 ir_variable *const existing = variables.get_variable(var->name);
 +	 if (existing != NULL) {
 +	    if (var->type != existing->type) {
 +	       linker_error_printf(prog, "%s `%s' declared as type "
 +				   "`%s' and type `%s'\n",
 +				   mode_string(var),
 +				   var->name, var->type->name,
 +				   existing->type->name);
 +	       return false;
 +	    }
 +
 +	    /* FINISHME: Handle non-constant initializers.
 +	     */
 +	    if (var->constant_value != NULL) {
 +	       if (existing->constant_value != NULL) {
 +		  if (!var->constant_value->has_value(existing->constant_value)) {
 +		     linker_error_printf(prog, "initializers for %s "
 +					 "`%s' have differing values\n",
 +					 mode_string(var), var->name);
 +		     return false;
 +		  }
 +	       } else
 +		  /* If the first-seen instance of a particular uniform did not
 +		   * have an initializer but a later instance does, copy the
 +		   * initializer to the version stored in the symbol table.
 +		   */
 +		  /* FINISHME: This is wrong.  The constant_value field should
 +		   * FINISHME: not be modified!  Imagine a case where a shader
 +		   * FINISHME: without an initializer is linked in two different
 +		   * FINISHME: programs with shaders that have differing
 +		   * FINISHME: initializers.  Linking with the first will
 +		   * FINISHME: modify the shader, and linking with the second
 +		   * FINISHME: will fail.
 +		   */
 +		  existing->constant_value = var->constant_value->clone(NULL);
 +	    }
 +	 } else
 +	    variables.add_variable(var->name, var);
 +      }
 +   }
 +
 +   return true;
 +}
 +
 +
 +/**
 + * Perform validation of uniforms used across multiple shader stages
 + */
 +bool
 +cross_validate_uniforms(struct gl_shader_program *prog)
 +{
 +   return cross_validate_globals(prog, prog->_LinkedShaders,
 +				 prog->_NumLinkedShaders, true);
 +}
 +
 +
 +/**
 + * Validate that outputs from one stage match inputs of another
 + */
 +bool
 +cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
 +				 gl_shader *producer, gl_shader *consumer)
 +{
 +   glsl_symbol_table parameters;
 +   /* FINISHME: Figure these out dynamically. */
 +   const char *const producer_stage = "vertex";
 +   const char *const consumer_stage = "fragment";
 +
 +   /* Find all shader outputs in the "producer" stage.
 +    */
 +   foreach_list(node, producer->ir) {
 +      ir_variable *const var = ((ir_instruction *) node)->as_variable();
 +
 +      /* FINISHME: For geometry shaders, this should also look for inout
 +       * FINISHME: variables.
 +       */
 +      if ((var == NULL) || (var->mode != ir_var_out))
 +	 continue;
 +
 +      parameters.add_variable(var->name, var);
 +   }
 +
 +
 +   /* Find all shader inputs in the "consumer" stage.  Any variables that have
 +    * matching outputs already in the symbol table must have the same type and
 +    * qualifiers.
 +    */
 +   foreach_list(node, consumer->ir) {
 +      ir_variable *const input = ((ir_instruction *) node)->as_variable();
 +
 +      /* FINISHME: For geometry shaders, this should also look for inout
 +       * FINISHME: variables.
 +       */
 +      if ((input == NULL) || (input->mode != ir_var_in))
 +	 continue;
 +
 +      ir_variable *const output = parameters.get_variable(input->name);
 +      if (output != NULL) {
 +	 /* Check that the types match between stages.
 +	  */
 +	 if (input->type != output->type) {
 +	    linker_error_printf(prog,
 +				"%s shader output `%s' delcared as "
 +				"type `%s', but %s shader input declared "
 +				"as type `%s'\n",
 +				producer_stage, output->name,
 +				output->type->name,
 +				consumer_stage, input->type->name);
 +	    return false;
 +	 }
 +
 +	 /* Check that all of the qualifiers match between stages.
 +	  */
 +	 if (input->centroid != output->centroid) {
 +	    linker_error_printf(prog,
 +				"%s shader output `%s' %s centroid qualifier, "
 +				"but %s shader input %s centroid qualifier\n",
 +				producer_stage,
 +				output->name,
 +				(output->centroid) ? "has" : "lacks",
 +				consumer_stage,
 +				(input->centroid) ? "has" : "lacks");
 +	    return false;
 +	 }
 +
 +	 if (input->invariant != output->invariant) {
 +	    linker_error_printf(prog,
 +				"%s shader output `%s' %s invariant qualifier, "
 +				"but %s shader input %s invariant qualifier\n",
 +				producer_stage,
 +				output->name,
 +				(output->invariant) ? "has" : "lacks",
 +				consumer_stage,
 +				(input->invariant) ? "has" : "lacks");
 +	    return false;
 +	 }
 +
 +	 if (input->interpolation != output->interpolation) {
 +	    linker_error_printf(prog,
 +				"%s shader output `%s' specifies %s "
 +				"interpolation qualifier, "
 +				"but %s shader input specifies %s "
 +				"interpolation qualifier\n",
 +				producer_stage,
 +				output->name,
 +				output->interpolation_string(),
 +				consumer_stage,
 +				input->interpolation_string());
 +	    return false;
 +	 }
 +      }
 +   }
 +
 +   return true;
 +}
 +
 +
 +/**
 + * Populates a shaders symbol table with all global declarations
 + */
 +static void
 +populate_symbol_table(gl_shader *sh)
 +{
 +   sh->symbols = new(sh) glsl_symbol_table;
 +
 +   foreach_list(node, sh->ir) {
 +      ir_instruction *const inst = (ir_instruction *) node;
 +      ir_variable *var;
 +      ir_function *func;
 +
 +      if ((func = inst->as_function()) != NULL) {
 +	 sh->symbols->add_function(func->name, func);
 +      } else if ((var = inst->as_variable()) != NULL) {
 +	 sh->symbols->add_variable(var->name, var);
 +      }
 +   }
 +}
 +
 +
 +/**
 + * Remap variables referenced in an instruction tree
 + *
 + * This is used when instruction trees are cloned from one shader and placed in
 + * another.  These trees will contain references to \c ir_variable nodes that
 + * do not exist in the target shader.  This function finds these \c ir_variable
 + * references and replaces the references with matching variables in the target
 + * shader.
 + *
 + * If there is no matching variable in the target shader, a clone of the
 + * \c ir_variable is made and added to the target shader.  The new variable is
 + * added to \b both the instruction stream and the symbol table.
 + *
 + * \param inst         IR tree that is to be processed.
 + * \param symbols      Symbol table containing global scope symbols in the
 + *                     linked shader.
 + * \param instructions Instruction stream where new variable declarations
 + *                     should be added.
 + */
 +void
 +remap_variables(ir_instruction *inst, glsl_symbol_table *symbols,
 +		exec_list *instructions, hash_table *temps)
 +{
 +   class remap_visitor : public ir_hierarchical_visitor {
 +   public:
 +      remap_visitor(glsl_symbol_table *symbols, exec_list *instructions,
 +		    hash_table *temps)
 +      {
 +	 this->symbols = symbols;
 +	 this->instructions = instructions;
 +	 this->temps = temps;
 +      }
 +
 +      virtual ir_visitor_status visit(ir_dereference_variable *ir)
 +      {
 +	 if (ir->var->mode == ir_var_temporary) {
 +	    ir_variable *var = (ir_variable *) hash_table_find(temps, ir->var);
 +
 +	    assert(var != NULL);
 +	    ir->var = var;
 +	    return visit_continue;
 +	 }
 +
 +	 ir_variable *const existing =
 +	    this->symbols->get_variable(ir->var->name);
 +	 if (existing != NULL)
 +	    ir->var = existing;
 +	 else {
 +	    ir_variable *copy = ir->var->clone(NULL);
 +
 +	    this->symbols->add_variable(copy->name, copy);
 +	    this->instructions->push_head(copy);
 +	    ir->var = copy;
 +	 }
 +
 +	 return visit_continue;
 +      }
 +
 +   private:
 +      glsl_symbol_table *symbols;
 +      exec_list *instructions;
 +      hash_table *temps;
 +   };
 +
 +   remap_visitor v(symbols, instructions, temps);
 +
 +   inst->accept(&v);
 +}
 +
 +
 +/**
 + * Move non-declarations from one instruction stream to another
 + *
 + * The intended usage pattern of this function is to pass the pointer to the
 + * head sentinal of a list (i.e., a pointer to the list cast to an \c exec_node
 + * pointer) for \c last and \c false for \c make_copies on the first
 + * call.  Successive calls pass the return value of the previous call for
 + * \c last and \c true for \c make_copies.
 + *
 + * \param instructions Source instruction stream
 + * \param last         Instruction after which new instructions should be
 + *                     inserted in the target instruction stream
 + * \param make_copies  Flag selecting whether instructions in \c instructions
 + *                     should be copied (via \c ir_instruction::clone) into the
 + *                     target list or moved.
 + *
 + * \return
 + * The new "last" instruction in the target instruction stream.  This pointer
 + * is suitable for use as the \c last parameter of a later call to this
 + * function.
 + */
 +exec_node *
 +move_non_declarations(exec_list *instructions, exec_node *last,
 +		      bool make_copies, gl_shader *target)
 +{
 +   hash_table *temps = NULL;
 +
 +   if (make_copies)
 +      temps = hash_table_ctor(0, hash_table_pointer_hash,
 +			      hash_table_pointer_compare);
 +
 +   foreach_list_safe(node, instructions) {
 +      ir_instruction *inst = (ir_instruction *) node;
 +
 +      if (inst->as_function())
 +	 continue;
 +
 +      ir_variable *var = inst->as_variable();
 +      if ((var != NULL) && (var->mode != ir_var_temporary))
 +	 continue;
 +
 +      assert(inst->as_assignment()
 +	     || ((var != NULL) && (var->mode == ir_var_temporary)));
 +
 +      if (make_copies) {
 +	 inst = inst->clone(NULL);
 +
 +	 if (var != NULL)
 +	    hash_table_insert(temps, inst, var);
 +	 else
 +	    remap_variables(inst, target->symbols, target->ir, temps);
 +      } else {
 +	 inst->remove();
 +      }
 +
 +      last->insert_after(inst);
 +      last = inst;
 +   }
 +
 +   if (make_copies)
 +      hash_table_dtor(temps);
 +
 +   return last;
 +}
 +
 +/**
 + * Get the function signature for main from a shader
 + */
 +static ir_function_signature *
 +get_main_function_signature(gl_shader *sh)
 +{
 +   ir_function *const f = sh->symbols->get_function("main");
 +   if (f != NULL) {
 +      exec_list void_parameters;
 +
 +      /* Look for the 'void main()' signature and ensure that it's defined.
 +       * This keeps the linker from accidentally pick a shader that just
 +       * contains a prototype for main.
 +       *
 +       * We don't have to check for multiple definitions of main (in multiple
 +       * shaders) because that would have already been caught above.
 +       */
 +      ir_function_signature *sig = f->matching_signature(&void_parameters);
 +      if ((sig != NULL) && sig->is_defined) {
 +	 return sig;
 +      }
 +   }
 +
 +   return NULL;
 +}
 +
 +
 +/**
 + * Combine a group of shaders for a single stage to generate a linked shader
 + *
 + * \note
 + * If this function is supplied a single shader, it is cloned, and the new
 + * shader is returned.
 + */
 +static struct gl_shader *
 +link_intrastage_shaders(struct gl_shader_program *prog,
 +			struct gl_shader **shader_list,
 +			unsigned num_shaders)
 +{
 +   /* Check that global variables defined in multiple shaders are consistent.
 +    */
 +   if (!cross_validate_globals(prog, shader_list, num_shaders, false))
 +      return NULL;
 +
 +   /* Check that there is only a single definition of each function signature
 +    * across all shaders.
 +    */
 +   for (unsigned i = 0; i < (num_shaders - 1); i++) {
 +      foreach_list(node, shader_list[i]->ir) {
 +	 ir_function *const f = ((ir_instruction *) node)->as_function();
 +
 +	 if (f == NULL)
 +	    continue;
 +
 +	 for (unsigned j = i + 1; j < num_shaders; j++) {
 +	    ir_function *const other =
 +	       shader_list[j]->symbols->get_function(f->name);
 +
 +	    /* If the other shader has no function (and therefore no function
 +	     * signatures) with the same name, skip to the next shader.
 +	     */
 +	    if (other == NULL)
 +	       continue;
 +
 +	    foreach_iter (exec_list_iterator, iter, *f) {
 +	       ir_function_signature *sig =
 +		  (ir_function_signature *) iter.get();
 +
 +	       if (!sig->is_defined || sig->is_built_in)
 +		  continue;
 +
 +	       ir_function_signature *other_sig =
 +		  other->exact_matching_signature(& sig->parameters);
 +
 +	       if ((other_sig != NULL) && other_sig->is_defined
 +		   && !other_sig->is_built_in) {
 +		  linker_error_printf(prog,
 +				      "function `%s' is multiply defined",
 +				      f->name);
 +		  return NULL;
 +	       }
 +	    }
 +	 }
 +      }
 +   }
 +
 +   /* Find the shader that defines main, and make a clone of it.
 +    *
 +    * Starting with the clone, search for undefined references.  If one is
 +    * found, find the shader that defines it.  Clone the reference and add
 +    * it to the shader.  Repeat until there are no undefined references or
 +    * until a reference cannot be resolved.
 +    */
 +   gl_shader *main = NULL;
 +   for (unsigned i = 0; i < num_shaders; i++) {
 +      if (get_main_function_signature(shader_list[i]) != NULL) {
 +	 main = shader_list[i];
 +	 break;
 +      }
 +   }
 +
 +   if (main == NULL) {
 +      linker_error_printf(prog, "%s shader lacks `main'\n",
 +			  (shader_list[0]->Type == GL_VERTEX_SHADER)
 +			  ? "vertex" : "fragment");
 +      return NULL;
 +   }
 +
 +   gl_shader *const linked = _mesa_new_shader(NULL, 0, main->Type);
 +   linked->ir = new(linked) exec_list;
 +   clone_ir_list(linked->ir, main->ir);
 +
 +   populate_symbol_table(linked);
 +
 +   /* The a pointer to the main function in the final linked shader (i.e., the
 +    * copy of the original shader that contained the main function).
 +    */
 +   ir_function_signature *const main_sig = get_main_function_signature(linked);
 +
 +   /* Move any instructions other than variable declarations or function
 +    * declarations into main.
 +    */
 +   exec_node *insertion_point =
 +      move_non_declarations(linked->ir, (exec_node *) &main_sig->body, false,
 +			    linked);
 +
 +   for (unsigned i = 0; i < num_shaders; i++) {
 +      if (shader_list[i] == main)
 +	 continue;
 +
 +      insertion_point = move_non_declarations(shader_list[i]->ir,
 +					      insertion_point, true, linked);
 +   }
 +
 +   /* Resolve initializers for global variables in the linked shader.
 +    */
 +   unsigned num_linking_shaders = num_shaders;
 +   for (unsigned i = 0; i < num_shaders; i++)
 +      num_linking_shaders += shader_list[i]->num_builtins_to_link;
 +
 +   gl_shader **linking_shaders =
 +      (gl_shader **) calloc(num_linking_shaders, sizeof(gl_shader *));
 +
 +   memcpy(linking_shaders, shader_list,
 +	  sizeof(linking_shaders[0]) * num_shaders);
 +
 +   unsigned idx = num_shaders;
 +   for (unsigned i = 0; i < num_shaders; i++) {
 +      memcpy(&linking_shaders[idx], shader_list[i]->builtins_to_link,
 +	     sizeof(linking_shaders[0]) * shader_list[i]->num_builtins_to_link);
 +      idx += shader_list[i]->num_builtins_to_link;
 +   }
 +
 +   assert(idx == num_linking_shaders);
 +
 +   link_function_calls(prog, linked, linking_shaders, num_linking_shaders);
 +
 +   free(linking_shaders);
 +
 +   return linked;
 +}
 +
 +
 +struct uniform_node {
 +   exec_node link;
 +   struct gl_uniform *u;
 +   unsigned slots;
 +};
 +
 +void
 +assign_uniform_locations(struct gl_shader_program *prog)
 +{
 +   /* */
 +   exec_list uniforms;
 +   unsigned total_uniforms = 0;
 +   hash_table *ht = hash_table_ctor(32, hash_table_string_hash,
 +				    hash_table_string_compare);
 +
 +   for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
 +      unsigned next_position = 0;
 +
 +      foreach_list(node, prog->_LinkedShaders[i]->ir) {
 +	 ir_variable *const var = ((ir_instruction *) node)->as_variable();
 +
 +	 if ((var == NULL) || (var->mode != ir_var_uniform))
 +	    continue;
 +
 +	 const unsigned vec4_slots = (var->component_slots() + 3) / 4;
 +	 assert(vec4_slots != 0);
 +
 +	 uniform_node *n = (uniform_node *) hash_table_find(ht, var->name);
 +	 if (n == NULL) {
 +	    n = (uniform_node *) calloc(1, sizeof(struct uniform_node));
 +	    n->u = (gl_uniform *) calloc(vec4_slots, sizeof(struct gl_uniform));
 +	    n->slots = vec4_slots;
 +
 +	    n->u[0].Name = strdup(var->name);
 +	    for (unsigned j = 1; j < vec4_slots; j++)
 +	       n->u[j].Name = n->u[0].Name;
 +
 +	    hash_table_insert(ht, n, n->u[0].Name);
 +	    uniforms.push_tail(& n->link);
 +	    total_uniforms += vec4_slots;
 +	 }
 +
 +	 if (var->constant_value != NULL)
 +	    for (unsigned j = 0; j < vec4_slots; j++)
 +	       n->u[j].Initialized = true;
 +
 +	 var->location = next_position;
 +
 +	 for (unsigned j = 0; j < vec4_slots; j++) {
 +	    switch (prog->_LinkedShaders[i]->Type) {
 +	    case GL_VERTEX_SHADER:
 +	       n->u[j].VertPos = next_position;
 +	       break;
 +	    case GL_FRAGMENT_SHADER:
 +	       n->u[j].FragPos = next_position;
 +	       break;
 +	    case GL_GEOMETRY_SHADER:
 +	       /* FINISHME: Support geometry shaders. */
 +	       assert(prog->_LinkedShaders[i]->Type != GL_GEOMETRY_SHADER);
 +	       break;
 +	    }
 +
 +	    next_position++;
 +	 }
 +      }
 +   }
 +
 +   gl_uniform_list *ul = (gl_uniform_list *)
 +      calloc(1, sizeof(gl_uniform_list));
 +
 +   ul->Size = total_uniforms;
 +   ul->NumUniforms = total_uniforms;
 +   ul->Uniforms = (gl_uniform *) calloc(total_uniforms, sizeof(gl_uniform));
 +
 +   unsigned idx = 0;
 +   uniform_node *next;
 +   for (uniform_node *node = (uniform_node *) uniforms.head
 +	   ; node->link.next != NULL
 +	   ; node = next) {
 +      next = (uniform_node *) node->link.next;
 +
 +      node->link.remove();
 +      memcpy(&ul->Uniforms[idx], node->u, sizeof(gl_uniform) * node->slots);
 +      idx += node->slots;
 +
 +      free(node->u);
 +      free(node);
 +   }
 +
 +   hash_table_dtor(ht);
 +
 +   prog->Uniforms = ul;
 +}
 +
 +
 +/**
 + * Find a contiguous set of available bits in a bitmask
 + *
 + * \param used_mask     Bits representing used (1) and unused (0) locations
 + * \param needed_count  Number of contiguous bits needed.
 + *
 + * \return
 + * Base location of the available bits on success or -1 on failure.
 + */
 +int
 +find_available_slots(unsigned used_mask, unsigned needed_count)
 +{
 +   unsigned needed_mask = (1 << needed_count) - 1;
 +   const int max_bit_to_test = (8 * sizeof(used_mask)) - needed_count;
 +
 +   /* The comparison to 32 is redundant, but without it GCC emits "warning:
 +    * cannot optimize possibly infinite loops" for the loop below.
 +    */
 +   if ((needed_count == 0) || (max_bit_to_test < 0) || (max_bit_to_test > 32))
 +      return -1;
 +
 +   for (int i = 0; i <= max_bit_to_test; i++) {
 +      if ((needed_mask & ~used_mask) == needed_mask)
 +	 return i;
 +
 +      needed_mask <<= 1;
 +   }
 +
 +   return -1;
 +}
 +
 +
 +bool
 +assign_attribute_locations(gl_shader_program *prog, unsigned max_attribute_index)
 +{
 +   /* Mark invalid attribute locations as being used.
 +    */
 +   unsigned used_locations = (max_attribute_index >= 32)
 +      ? ~0 : ~((1 << max_attribute_index) - 1);
 +
 +   gl_shader *const sh = prog->_LinkedShaders[0];
 +   assert(sh->Type == GL_VERTEX_SHADER);
 +
 +   /* Operate in a total of four passes.
 +    *
 +    * 1. Invalidate the location assignments for all vertex shader inputs.
 +    *
 +    * 2. Assign locations for inputs that have user-defined (via
 +    *    glBindVertexAttribLocation) locatoins.
 +    *
 +    * 3. Sort the attributes without assigned locations by number of slots
 +    *    required in decreasing order.  Fragmentation caused by attribute
 +    *    locations assigned by the application may prevent large attributes
 +    *    from having enough contiguous space.
 +    *
 +    * 4. Assign locations to any inputs without assigned locations.
 +    */
 +
 +   invalidate_variable_locations(sh, ir_var_in, VERT_ATTRIB_GENERIC0);
 +
 +   if (prog->Attributes != NULL) {
 +      for (unsigned i = 0; i < prog->Attributes->NumParameters; i++) {
 +	 ir_variable *const var =
 +	    sh->symbols->get_variable(prog->Attributes->Parameters[i].Name);
 +
 +	 /* Note: attributes that occupy multiple slots, such as arrays or
 +	  * matrices, may appear in the attrib array multiple times.
 +	  */
 +	 if ((var == NULL) || (var->location != -1))
 +	    continue;
 +
 +	 /* From page 61 of the OpenGL 4.0 spec:
 +	  *
 +	  *     "LinkProgram will fail if the attribute bindings assigned by
 +	  *     BindAttribLocation do not leave not enough space to assign a
 +	  *     location for an active matrix attribute or an active attribute
 +	  *     array, both of which require multiple contiguous generic
 +	  *     attributes."
 +	  *
 +	  * Previous versions of the spec contain similar language but omit the
 +	  * bit about attribute arrays.
 +	  *
 +	  * Page 61 of the OpenGL 4.0 spec also says:
 +	  *
 +	  *     "It is possible for an application to bind more than one
 +	  *     attribute name to the same location. This is referred to as
 +	  *     aliasing. This will only work if only one of the aliased
 +	  *     attributes is active in the executable program, or if no path
 +	  *     through the shader consumes more than one attribute of a set
 +	  *     of attributes aliased to the same location. A link error can
 +	  *     occur if the linker determines that every path through the
 +	  *     shader consumes multiple aliased attributes, but
 +	  *     implementations are not required to generate an error in this
 +	  *     case."
 +	  *
 +	  * These two paragraphs are either somewhat contradictory, or I don't
 +	  * fully understand one or both of them.
 +	  */
 +	 /* FINISHME: The code as currently written does not support attribute
 +	  * FINISHME: location aliasing (see comment above).
 +	  */
 +	 const int attr = prog->Attributes->Parameters[i].StateIndexes[0];
 +	 const unsigned slots = count_attribute_slots(var->type);
 +
 +	 /* Mask representing the contiguous slots that will be used by this
 +	  * attribute.
 +	  */
 +	 const unsigned use_mask = (1 << slots) - 1;
 +
 +	 /* Generate a link error if the set of bits requested for this
 +	  * attribute overlaps any previously allocated bits.
 +	  */
 +	 if ((~(use_mask << attr) & used_locations) != used_locations) {
 +	    linker_error_printf(prog,
 +				"insufficient contiguous attribute locations "
 +				"available for vertex shader input `%s'",
 +				var->name);
 +	    return false;
 +	 }
 +
 +	 var->location = VERT_ATTRIB_GENERIC0 + attr;
 +	 used_locations |= (use_mask << attr);
 +      }
 +   }
 +
 +   /* Temporary storage for the set of attributes that need locations assigned.
 +    */
 +   struct temp_attr {
 +      unsigned slots;
 +      ir_variable *var;
 +
 +      /* Used below in the call to qsort. */
 +      static int compare(const void *a, const void *b)
 +      {
 +	 const temp_attr *const l = (const temp_attr *) a;
 +	 const temp_attr *const r = (const temp_attr *) b;
 +
 +	 /* Reversed because we want a descending order sort below. */
 +	 return r->slots - l->slots;
 +      }
 +   } to_assign[16];
 +
 +   unsigned num_attr = 0;
 +
 +   foreach_list(node, sh->ir) {
 +      ir_variable *const var = ((ir_instruction *) node)->as_variable();
 +
 +      if ((var == NULL) || (var->mode != ir_var_in))
 +	 continue;
 +
 +      /* The location was explicitly assigned, nothing to do here.
 +       */
 +      if (var->location != -1)
 +	 continue;
 +
 +      to_assign[num_attr].slots = count_attribute_slots(var->type);
 +      to_assign[num_attr].var = var;
 +      num_attr++;
 +   }
 +
 +   /* If all of the attributes were assigned locations by the application (or
 +    * are built-in attributes with fixed locations), return early.  This should
 +    * be the common case.
 +    */
 +   if (num_attr == 0)
 +      return true;
 +
 +   qsort(to_assign, num_attr, sizeof(to_assign[0]), temp_attr::compare);
 +
 +   /* VERT_ATTRIB_GENERIC0 is a psdueo-alias for VERT_ATTRIB_POS.  It can only
 +    * be explicitly assigned by via glBindAttribLocation.  Mark it as reserved
 +    * to prevent it from being automatically allocated below.
 +    */
 +   used_locations |= (1 << 0);
 +
 +   for (unsigned i = 0; i < num_attr; i++) {
 +      /* Mask representing the contiguous slots that will be used by this
 +       * attribute.
 +       */
 +      const unsigned use_mask = (1 << to_assign[i].slots) - 1;
 +
 +      int location = find_available_slots(used_locations, to_assign[i].slots);
 +
 +      if (location < 0) {
 +	 linker_error_printf(prog,
 +			     "insufficient contiguous attribute locations "
 +			     "available for vertex shader input `%s'",
 +			     to_assign[i].var->name);
 +	 return false;
 +      }
 +
 +      to_assign[i].var->location = VERT_ATTRIB_GENERIC0 + location;
 +      used_locations |= (use_mask << location);
 +   }
 +
 +   return true;
 +}
 +
 +
 +void
 +assign_varying_locations(gl_shader *producer, gl_shader *consumer)
 +{
 +   /* FINISHME: Set dynamically when geometry shader support is added. */
 +   unsigned output_index = VERT_RESULT_VAR0;
 +   unsigned input_index = FRAG_ATTRIB_VAR0;
 +
 +   /* Operate in a total of three passes.
 +    *
 +    * 1. Assign locations for any matching inputs and outputs.
 +    *
 +    * 2. Mark output variables in the producer that do not have locations as
 +    *    not being outputs.  This lets the optimizer eliminate them.
 +    *
 +    * 3. Mark input variables in the consumer that do not have locations as
 +    *    not being inputs.  This lets the optimizer eliminate them.
 +    */
 +
 +   invalidate_variable_locations(producer, ir_var_out, VERT_RESULT_VAR0);
 +   invalidate_variable_locations(consumer, ir_var_in, FRAG_ATTRIB_VAR0);
 +
 +   foreach_list(node, producer->ir) {
 +      ir_variable *const output_var = ((ir_instruction *) node)->as_variable();
 +
 +      if ((output_var == NULL) || (output_var->mode != ir_var_out)
 +	  || (output_var->location != -1))
 +	 continue;
 +
 +      ir_variable *const input_var =
 +	 consumer->symbols->get_variable(output_var->name);
 +
 +      if ((input_var == NULL) || (input_var->mode != ir_var_in))
 +	 continue;
 +
 +      assert(input_var->location == -1);
 +
 +      /* FINISHME: Location assignment will need some changes when arrays,
 +       * FINISHME: matrices, and structures are allowed as shader inputs /
 +       * FINISHME: outputs.
 +       */
 +      output_var->location = output_index;
 +      input_var->location = input_index;
 +
 +      output_index++;
 +      input_index++;
 +   }
 +
 +   foreach_list(node, producer->ir) {
 +      ir_variable *const var = ((ir_instruction *) node)->as_variable();
 +
 +      if ((var == NULL) || (var->mode != ir_var_out))
 +	 continue;
 +
 +      /* An 'out' variable is only really a shader output if its value is read
 +       * by the following stage.
 +       */
 +      if (var->location == -1) {
 +	 var->shader_out = false;
 +	 var->mode = ir_var_auto;
 +      }
 +   }
 +
 +   foreach_list(node, consumer->ir) {
 +      ir_variable *const var = ((ir_instruction *) node)->as_variable();
 +
 +      if ((var == NULL) || (var->mode != ir_var_in))
 +	 continue;
 +
 +      /* An 'in' variable is only really a shader input if its value is written
 +       * by the previous stage.
 +       */
 +      var->shader_in = (var->location != -1);
 +   }
 +}
 +
 +
 +void
 +link_shaders(struct gl_shader_program *prog)
 +{
 +   prog->LinkStatus = false;
 +   prog->Validated = false;
 +   prog->_Used = false;
 +
 +   if (prog->InfoLog != NULL)
 +      talloc_free(prog->InfoLog);
 +
 +   prog->InfoLog = talloc_strdup(NULL, "");
 +
 +   /* Separate the shaders into groups based on their type.
 +    */
 +   struct gl_shader **vert_shader_list;
 +   unsigned num_vert_shaders = 0;
 +   struct gl_shader **frag_shader_list;
 +   unsigned num_frag_shaders = 0;
 +
 +   vert_shader_list = (struct gl_shader **)
 +      calloc(2 * prog->NumShaders, sizeof(struct gl_shader *));
 +   frag_shader_list =  &vert_shader_list[prog->NumShaders];
 +
 +   unsigned min_version = UINT_MAX;
 +   unsigned max_version = 0;
 +   for (unsigned i = 0; i < prog->NumShaders; i++) {
 +      min_version = MIN2(min_version, prog->Shaders[i]->Version);
 +      max_version = MAX2(max_version, prog->Shaders[i]->Version);
 +
 +      switch (prog->Shaders[i]->Type) {
 +      case GL_VERTEX_SHADER:
 +	 vert_shader_list[num_vert_shaders] = prog->Shaders[i];
 +	 num_vert_shaders++;
 +	 break;
 +      case GL_FRAGMENT_SHADER:
 +	 frag_shader_list[num_frag_shaders] = prog->Shaders[i];
 +	 num_frag_shaders++;
 +	 break;
 +      case GL_GEOMETRY_SHADER:
 +	 /* FINISHME: Support geometry shaders. */
 +	 assert(prog->Shaders[i]->Type != GL_GEOMETRY_SHADER);
 +	 break;
 +      }
 +   }
 +
 +   /* Previous to GLSL version 1.30, different compilation units could mix and
 +    * match shading language versions.  With GLSL 1.30 and later, the versions
 +    * of all shaders must match.
 +    */
 +   assert(min_version >= 110);
 +   assert(max_version <= 130);
 +   if ((max_version >= 130) && (min_version != max_version)) {
 +      linker_error_printf(prog, "all shaders must use same shading "
 +			  "language version\n");
 +      goto done;
 +   }
 +
 +   prog->Version = max_version;
 +
 +   /* Link all shaders for a particular stage and validate the result.
 +    */
 +   prog->_NumLinkedShaders = 0;
 +   if (num_vert_shaders > 0) {
 +      gl_shader *const sh =
 +	 link_intrastage_shaders(prog, vert_shader_list, num_vert_shaders);
 +
 +      if (sh == NULL)
 +	 goto done;
 +
 +      if (!validate_vertex_shader_executable(prog, sh))
 +	  goto done;
 +
 +      prog->_LinkedShaders[prog->_NumLinkedShaders] = sh;
 +      prog->_NumLinkedShaders++;
 +   }
 +
 +   if (num_frag_shaders > 0) {
 +      gl_shader *const sh =
 +	 link_intrastage_shaders(prog, frag_shader_list, num_frag_shaders);
 +
 +      if (sh == NULL)
 +	 goto done;
 +
 +      if (!validate_fragment_shader_executable(prog, sh))
 +	  goto done;
 +
 +      prog->_LinkedShaders[prog->_NumLinkedShaders] = sh;
 +      prog->_NumLinkedShaders++;
 +   }
 +
 +   /* Here begins the inter-stage linking phase.  Some initial validation is
 +    * performed, then locations are assigned for uniforms, attributes, and
 +    * varyings.
 +    */
 +   if (cross_validate_uniforms(prog)) {
 +      /* Validate the inputs of each stage with the output of the preceeding
 +       * stage.
 +       */
 +      for (unsigned i = 1; i < prog->_NumLinkedShaders; i++) {
 +	 if (!cross_validate_outputs_to_inputs(prog,
 +					       prog->_LinkedShaders[i - 1],
 +					       prog->_LinkedShaders[i]))
 +	    goto done;
 +      }
 +
 +      prog->LinkStatus = true;
 +   }
 +
 +   /* FINISHME: Perform whole-program optimization here. */
 +   for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
 +      /* Optimization passes */
 +      bool progress;
 +      exec_list *ir = prog->_LinkedShaders[i]->ir;
 +
 +      /* Lowering */
 +      do_mat_op_to_vec(ir);
 +      do_mod_to_fract(ir);
 +      do_div_to_mul_rcp(ir);
 +
 +      do {
 +	 progress = false;
 +
 +	 progress = do_function_inlining(ir) || progress;
 +	 progress = do_if_simplification(ir) || progress;
 +	 progress = do_copy_propagation(ir) || progress;
 +	 progress = do_dead_code_local(ir) || progress;
 +#if 0
 +	 progress = do_dead_code_unlinked(state, ir) || progress;
 +#endif
 +	 progress = do_constant_variable_unlinked(ir) || progress;
 +	 progress = do_constant_folding(ir) || progress;
 +	 progress = do_if_return(ir) || progress;
 +#if 0
 +	 if (ctx->Shader.EmitNoIfs)
 +	    progress = do_if_to_cond_assign(ir) || progress;
 +#endif
 +
 +	 progress = do_vec_index_to_swizzle(ir) || progress;
 +	 /* Do this one after the previous to let the easier pass handle
 +	  * constant vector indexing.
 +	  */
 +	 progress = do_vec_index_to_cond_assign(ir) || progress;
 +
 +	 progress = do_swizzle_swizzle(ir) || progress;
 +      } while (progress);
 +   }
 +
 +   assign_uniform_locations(prog);
 +
 +   if (prog->_LinkedShaders[0]->Type == GL_VERTEX_SHADER)
 +      /* FINISHME: The value of the max_attribute_index parameter is
 +       * FINISHME: implementation dependent based on the value of
 +       * FINISHME: GL_MAX_VERTEX_ATTRIBS.  GL_MAX_VERTEX_ATTRIBS must be
 +       * FINISHME: at least 16, so hardcode 16 for now.
 +       */
 +      if (!assign_attribute_locations(prog, 16))
 +	 goto done;
 +
 +   for (unsigned i = 1; i < prog->_NumLinkedShaders; i++)
 +      assign_varying_locations(prog->_LinkedShaders[i - 1],
 +			       prog->_LinkedShaders[i]);
 +
 +   /* FINISHME: Assign fragment shader output locations. */
 +
 +done:
 +   free(vert_shader_list);
 +}
diff --cc src/glsl/program.h
index bb1cd919cd6,00000000000..0a49203d4b2
mode 100644,000000..100644
--- a/src/glsl/program.h
+++ b/src/glsl/program.h
@@@ -1,33 -1,0 +1,33 @@@
 +/*
 + * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
 + * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
 + * Copyright Â© 2010 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice shall be included
 + * in all copies or substantial portions of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
 + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 + */
 +
 +#include <GL/gl.h>
 +#include "main/mtypes.h"
 +
 +extern "C" {
- #include "shader/prog_parameter.h"
- #include "shader/prog_uniform.h"
++#include "program/prog_parameter.h"
++#include "program/prog_uniform.h"
 +}
 +
 +extern void
 +link_shaders(struct gl_shader_program *prog);
diff --cc src/mesa/main/shaderapi.c
index 00000000000,9cb2391035d..89b9557e84f
mode 000000,100644..100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@@ -1,0 -1,1638 +1,1640 @@@
+ /*
+  * Mesa 3-D graphics library
+  *
+  * Copyright (C) 2004-2008  Brian Paul   All Rights Reserved.
+  * Copyright (C) 2009-2010  VMware, Inc.  All Rights Reserved.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included
+  * in all copies or substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  */
+ 
+ /**
+  * \file shaderapi.c
+  * \author Brian Paul
+  *
+  * Implementation of GLSL-related API functions.
+  * The glUniform* functions are in uniforms.c
+  *
+  *
+  * XXX things to do:
+  * 1. Check that the right error code is generated for all _mesa_error() calls.
+  * 2. Insert FLUSH_VERTICES calls in various places
+  */
+ 
+ 
+ #include "main/glheader.h"
+ #include "main/context.h"
+ #include "main/dispatch.h"
+ #include "main/enums.h"
+ #include "main/hash.h"
+ #include "main/shaderapi.h"
+ #include "main/shaderobj.h"
+ #include "program/program.h"
+ #include "program/prog_parameter.h"
+ #include "program/prog_uniform.h"
+ #include "slang/slang_compile.h"
+ #include "slang/slang_link.h"
++#include "talloc.h"
+ 
+ 
+ /** Define this to enable shader substitution (see below) */
+ #define SHADER_SUBST 0
+ 
+ 
+ /**
+  * Return mask of GLSL_x flags by examining the MESA_GLSL env var.
+  */
+ static GLbitfield
+ get_shader_flags(void)
+ {
+    GLbitfield flags = 0x0;
+    const char *env = _mesa_getenv("MESA_GLSL");
+ 
+    if (env) {
+       if (strstr(env, "dump"))
+          flags |= GLSL_DUMP;
+       if (strstr(env, "log"))
+          flags |= GLSL_LOG;
+       if (strstr(env, "nopvert"))
+          flags |= GLSL_NOP_VERT;
+       if (strstr(env, "nopfrag"))
+          flags |= GLSL_NOP_FRAG;
+       if (strstr(env, "nopt"))
+          flags |= GLSL_NO_OPT;
+       else if (strstr(env, "opt"))
+          flags |= GLSL_OPT;
+       if (strstr(env, "uniform"))
+          flags |= GLSL_UNIFORMS;
+       if (strstr(env, "useprog"))
+          flags |= GLSL_USE_PROG;
+    }
+ 
+    return flags;
+ }
+ 
+ 
+ /**
+  * Initialize context's shader state.
+  */
+ void
+ _mesa_init_shader_state(GLcontext *ctx)
+ {
+    /* Device drivers may override these to control what kind of instructions
+     * are generated by the GLSL compiler.
+     */
+    ctx->Shader.EmitHighLevelInstructions = GL_TRUE;
+    ctx->Shader.EmitContReturn = GL_TRUE;
+    ctx->Shader.EmitCondCodes = GL_FALSE;
+    ctx->Shader.EmitComments = GL_FALSE;
++   ctx->Shader.EmitNoIfs = GL_FALSE;
+    ctx->Shader.Flags = get_shader_flags();
+ 
+    /* Default pragma settings */
+    ctx->Shader.DefaultPragmas.IgnoreOptimize = GL_FALSE;
+    ctx->Shader.DefaultPragmas.IgnoreDebug = GL_FALSE;
+    ctx->Shader.DefaultPragmas.Optimize = GL_TRUE;
+    ctx->Shader.DefaultPragmas.Debug = GL_FALSE;
+ }
+ 
+ 
+ /**
+  * Free the per-context shader-related state.
+  */
+ void
+ _mesa_free_shader_state(GLcontext *ctx)
+ {
+    _mesa_reference_shader_program(ctx, &ctx->Shader.CurrentProgram, NULL);
+ }
+ 
+ 
+ /**
+  * Return the size of the given GLSL datatype, in floats (components).
+  */
+ GLint
+ _mesa_sizeof_glsl_type(GLenum type)
+ {
+    switch (type) {
+    case GL_FLOAT:
+    case GL_INT:
+    case GL_BOOL:
+    case GL_SAMPLER_1D:
+    case GL_SAMPLER_2D:
+    case GL_SAMPLER_3D:
+    case GL_SAMPLER_CUBE:
+    case GL_SAMPLER_1D_SHADOW:
+    case GL_SAMPLER_2D_SHADOW:
+    case GL_SAMPLER_2D_RECT_ARB:
+    case GL_SAMPLER_2D_RECT_SHADOW_ARB:
+    case GL_SAMPLER_1D_ARRAY_EXT:
+    case GL_SAMPLER_2D_ARRAY_EXT:
+    case GL_SAMPLER_1D_ARRAY_SHADOW_EXT:
+    case GL_SAMPLER_2D_ARRAY_SHADOW_EXT:
+    case GL_SAMPLER_CUBE_SHADOW_EXT:
+       return 1;
+    case GL_FLOAT_VEC2:
+    case GL_INT_VEC2:
+    case GL_UNSIGNED_INT_VEC2:
+    case GL_BOOL_VEC2:
+       return 2;
+    case GL_FLOAT_VEC3:
+    case GL_INT_VEC3:
+    case GL_UNSIGNED_INT_VEC3:
+    case GL_BOOL_VEC3:
+       return 3;
+    case GL_FLOAT_VEC4:
+    case GL_INT_VEC4:
+    case GL_UNSIGNED_INT_VEC4:
+    case GL_BOOL_VEC4:
+       return 4;
+    case GL_FLOAT_MAT2:
+    case GL_FLOAT_MAT2x3:
+    case GL_FLOAT_MAT2x4:
+       return 8; /* two float[4] vectors */
+    case GL_FLOAT_MAT3:
+    case GL_FLOAT_MAT3x2:
+    case GL_FLOAT_MAT3x4:
+       return 12; /* three float[4] vectors */
+    case GL_FLOAT_MAT4:
+    case GL_FLOAT_MAT4x2:
+    case GL_FLOAT_MAT4x3:
+       return 16;  /* four float[4] vectors */
+    default:
+       _mesa_problem(NULL, "Invalid type in _mesa_sizeof_glsl_type()");
+       return 1;
+    }
+ }
+ 
+ 
+ /**
+  * Copy string from <src> to <dst>, up to maxLength characters, returning
+  * length of <dst> in <length>.
+  * \param src  the strings source
+  * \param maxLength  max chars to copy
+  * \param length  returns number of chars copied
+  * \param dst  the string destination
+  */
+ void
+ _mesa_copy_string(GLchar *dst, GLsizei maxLength,
+                   GLsizei *length, const GLchar *src)
+ {
+    GLsizei len;
+    for (len = 0; len < maxLength - 1 && src && src[len]; len++)
+       dst[len] = src[len];
+    if (maxLength > 0)
+       dst[len] = 0;
+    if (length)
+       *length = len;
+ }
+ 
+ 
+ 
+ /**
+  * Find the length of the longest transform feedback varying name
+  * which was specified with glTransformFeedbackVaryings().
+  */
+ static GLint
+ longest_feedback_varying_name(const struct gl_shader_program *shProg)
+ {
+    GLuint i;
+    GLint max = 0;
+    for (i = 0; i < shProg->TransformFeedback.NumVarying; i++) {
+       GLint len = strlen(shProg->TransformFeedback.VaryingNames[i]);
+       if (len > max)
+          max = len;
+    }
+    return max;
+ }
+ 
+ 
+ 
+ static GLboolean
+ is_program(GLcontext *ctx, GLuint name)
+ {
+    struct gl_shader_program *shProg = _mesa_lookup_shader_program(ctx, name);
+    return shProg ? GL_TRUE : GL_FALSE;
+ }
+ 
+ 
+ static GLboolean
+ is_shader(GLcontext *ctx, GLuint name)
+ {
+    struct gl_shader *shader = _mesa_lookup_shader(ctx, name);
+    return shader ? GL_TRUE : GL_FALSE;
+ }
+ 
+ 
+ /**
+  * Attach shader to a shader program.
+  */
+ static void
+ attach_shader(GLcontext *ctx, GLuint program, GLuint shader)
+ {
+    struct gl_shader_program *shProg;
+    struct gl_shader *sh;
+    GLuint i, n;
+ 
+    shProg = _mesa_lookup_shader_program_err(ctx, program, "glAttachShader");
+    if (!shProg)
+       return;
+ 
+    sh = _mesa_lookup_shader_err(ctx, shader, "glAttachShader");
+    if (!sh) {
+       return;
+    }
+ 
+    n = shProg->NumShaders;
+    for (i = 0; i < n; i++) {
+       if (shProg->Shaders[i] == sh) {
+          /* The shader is already attched to this program.  The
+           * GL_ARB_shader_objects spec says:
+           *
+           *     "The error INVALID_OPERATION is generated by AttachObjectARB
+           *     if <obj> is already attached to <containerObj>."
+           */
+          _mesa_error(ctx, GL_INVALID_OPERATION, "glAttachShader");
+          return;
+       }
+    }
+ 
+    /* grow list */
+    shProg->Shaders = (struct gl_shader **)
+       _mesa_realloc(shProg->Shaders,
+                     n * sizeof(struct gl_shader *),
+                     (n + 1) * sizeof(struct gl_shader *));
+    if (!shProg->Shaders) {
+       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glAttachShader");
+       return;
+    }
+ 
+    /* append */
+    shProg->Shaders[n] = NULL; /* since realloc() didn't zero the new space */
+    _mesa_reference_shader(ctx, &shProg->Shaders[n], sh);
+    shProg->NumShaders++;
+ }
+ 
+ 
+ static GLint
+ get_attrib_location(GLcontext *ctx, GLuint program, const GLchar *name)
+ {
+    struct gl_shader_program *shProg
+       = _mesa_lookup_shader_program_err(ctx, program, "glGetAttribLocation");
+ 
+    if (!shProg) {
+       return -1;
+    }
+ 
+    if (!shProg->LinkStatus) {
+       _mesa_error(ctx, GL_INVALID_OPERATION,
+                   "glGetAttribLocation(program not linked)");
+       return -1;
+    }
+ 
+    if (!name)
+       return -1;
+ 
+    if (shProg->VertexProgram) {
+       const struct gl_program_parameter_list *attribs =
+          shProg->VertexProgram->Base.Attributes;
+       if (attribs) {
+          GLint i = _mesa_lookup_parameter_index(attribs, -1, name);
+          if (i >= 0) {
+             return attribs->Parameters[i].StateIndexes[0];
+          }
+       }
+    }
+    return -1;
+ }
+ 
+ 
+ static void
+ bind_attrib_location(GLcontext *ctx, GLuint program, GLuint index,
+                      const GLchar *name)
+ {
+    struct gl_shader_program *shProg;
+    const GLint size = -1; /* unknown size */
+    GLint i, oldIndex;
+    GLenum datatype = GL_FLOAT_VEC4;
+ 
+    shProg = _mesa_lookup_shader_program_err(ctx, program,
+                                             "glBindAttribLocation");
+    if (!shProg) {
+       return;
+    }
+ 
+    if (!name)
+       return;
+ 
+    if (strncmp(name, "gl_", 3) == 0) {
+       _mesa_error(ctx, GL_INVALID_OPERATION,
+                   "glBindAttribLocation(illegal name)");
+       return;
+    }
+ 
+    if (index >= ctx->Const.VertexProgram.MaxAttribs) {
+       _mesa_error(ctx, GL_INVALID_VALUE, "glBindAttribLocation(index)");
+       return;
+    }
+ 
+    if (shProg->LinkStatus) {
+       /* get current index/location for the attribute */
+       oldIndex = get_attrib_location(ctx, program, name);
+    }
+    else {
+       oldIndex = -1;
+    }
+ 
+    /* this will replace the current value if it's already in the list */
+    i = _mesa_add_attribute(shProg->Attributes, name, size, datatype, index);
+    if (i < 0) {
+       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBindAttribLocation");
+       return;
+    }
+ 
+    /*
+     * Note that this attribute binding won't go into effect until
+     * glLinkProgram is called again.
+     */
+ }
+ 
+ 
+ static GLuint
+ create_shader(GLcontext *ctx, GLenum type)
+ {
+    struct gl_shader *sh;
+    GLuint name;
+ 
+    name = _mesa_HashFindFreeKeyBlock(ctx->Shared->ShaderObjects, 1);
+ 
+    switch (type) {
+    case GL_FRAGMENT_SHADER:
+    case GL_VERTEX_SHADER:
+    case GL_GEOMETRY_SHADER_ARB:
+       sh = ctx->Driver.NewShader(ctx, name, type);
+       break;
+    default:
+       _mesa_error(ctx, GL_INVALID_ENUM, "CreateShader(type)");
+       return 0;
+    }
+ 
+    _mesa_HashInsert(ctx->Shared->ShaderObjects, name, sh);
+ 
+    return name;
+ }
+ 
+ 
+ static GLuint 
+ create_shader_program(GLcontext *ctx)
+ {
+    GLuint name;
+    struct gl_shader_program *shProg;
+ 
+    name = _mesa_HashFindFreeKeyBlock(ctx->Shared->ShaderObjects, 1);
+ 
+    shProg = ctx->Driver.NewShaderProgram(ctx, name);
+ 
+    _mesa_HashInsert(ctx->Shared->ShaderObjects, name, shProg);
+ 
+    assert(shProg->RefCount == 1);
+ 
+    return name;
+ }
+ 
+ 
+ /**
+  * Named w/ "2" to indicate OpenGL 2.x vs GL_ARB_fragment_programs's
+  * DeleteProgramARB.
+  */
+ static void
+ delete_shader_program(GLcontext *ctx, GLuint name)
+ {
+    /*
+     * NOTE: deleting shaders/programs works a bit differently than
+     * texture objects (and buffer objects, etc).  Shader/program
+     * handles/IDs exist in the hash table until the object is really
+     * deleted (refcount==0).  With texture objects, the handle/ID is
+     * removed from the hash table in glDeleteTextures() while the tex
+     * object itself might linger until its refcount goes to zero.
+     */
+    struct gl_shader_program *shProg;
+ 
+    shProg = _mesa_lookup_shader_program_err(ctx, name, "glDeleteProgram");
+    if (!shProg)
+       return;
+ 
+    shProg->DeletePending = GL_TRUE;
+ 
+    /* effectively, decr shProg's refcount */
+    _mesa_reference_shader_program(ctx, &shProg, NULL);
+ }
+ 
+ 
+ static void
+ delete_shader(GLcontext *ctx, GLuint shader)
+ {
+    struct gl_shader *sh;
+ 
+    sh = _mesa_lookup_shader_err(ctx, shader, "glDeleteShader");
+    if (!sh)
+       return;
+ 
+    sh->DeletePending = GL_TRUE;
+ 
+    /* effectively, decr sh's refcount */
+    _mesa_reference_shader(ctx, &sh, NULL);
+ }
+ 
+ 
+ static void
+ detach_shader(GLcontext *ctx, GLuint program, GLuint shader)
+ {
+    struct gl_shader_program *shProg;
+    GLuint n;
+    GLuint i, j;
+ 
+    shProg = _mesa_lookup_shader_program_err(ctx, program, "glDetachShader");
+    if (!shProg)
+       return;
+ 
+    n = shProg->NumShaders;
+ 
+    for (i = 0; i < n; i++) {
+       if (shProg->Shaders[i]->Name == shader) {
+          /* found it */
+          struct gl_shader **newList;
+ 
+          /* release */
+          _mesa_reference_shader(ctx, &shProg->Shaders[i], NULL);
+ 
+          /* alloc new, smaller array */
+          newList = (struct gl_shader **)
+             malloc((n - 1) * sizeof(struct gl_shader *));
+          if (!newList) {
+             _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDetachShader");
+             return;
+          }
+          for (j = 0; j < i; j++) {
+             newList[j] = shProg->Shaders[j];
+          }
+          while (++i < n)
+             newList[j++] = shProg->Shaders[i];
+          free(shProg->Shaders);
+ 
+          shProg->Shaders = newList;
+          shProg->NumShaders = n - 1;
+ 
+ #ifdef DEBUG
+          /* sanity check */
+          {
+             for (j = 0; j < shProg->NumShaders; j++) {
+                assert(shProg->Shaders[j]->Type == GL_VERTEX_SHADER ||
+                       shProg->Shaders[j]->Type == GL_FRAGMENT_SHADER);
+                assert(shProg->Shaders[j]->RefCount > 0);
+             }
+          }
+ #endif
+ 
+          return;
+       }
+    }
+ 
+    /* not found */
+    {
+       GLenum err;
+       if (is_shader(ctx, shader))
+          err = GL_INVALID_OPERATION;
+       else if (is_program(ctx, shader))
+          err = GL_INVALID_OPERATION;
+       else
+          err = GL_INVALID_VALUE;
+       _mesa_error(ctx, err, "glDetachProgram(shader)");
+       return;
+    }
+ }
+ 
+ 
+ static void
+ get_active_attrib(GLcontext *ctx, GLuint program, GLuint index,
+                   GLsizei maxLength, GLsizei *length, GLint *size,
+                   GLenum *type, GLchar *nameOut)
+ {
+    const struct gl_program_parameter_list *attribs = NULL;
+    struct gl_shader_program *shProg;
+ 
+    shProg = _mesa_lookup_shader_program_err(ctx, program, "glGetActiveAttrib");
+    if (!shProg)
+       return;
+ 
+    if (shProg->VertexProgram)
+       attribs = shProg->VertexProgram->Base.Attributes;
+ 
+    if (!attribs || index >= attribs->NumParameters) {
+       _mesa_error(ctx, GL_INVALID_VALUE, "glGetActiveAttrib(index)");
+       return;
+    }
+ 
+    _mesa_copy_string(nameOut, maxLength, length,
+                      attribs->Parameters[index].Name);
+ 
+    if (size)
+       *size = attribs->Parameters[index].Size
+          / _mesa_sizeof_glsl_type(attribs->Parameters[index].DataType);
+ 
+    if (type)
+       *type = attribs->Parameters[index].DataType;
+ }
+ 
+ 
+ /**
+  * Return list of shaders attached to shader program.
+  */
+ static void
+ get_attached_shaders(GLcontext *ctx, GLuint program, GLsizei maxCount,
+                      GLsizei *count, GLuint *obj)
+ {
+    struct gl_shader_program *shProg =
+       _mesa_lookup_shader_program_err(ctx, program, "glGetAttachedShaders");
+    if (shProg) {
+       GLuint i;
+       for (i = 0; i < (GLuint) maxCount && i < shProg->NumShaders; i++) {
+          obj[i] = shProg->Shaders[i]->Name;
+       }
+       if (count)
+          *count = i;
+    }
+ }
+ 
+ 
+ /**
+  * glGetHandleARB() - return ID/name of currently bound shader program.
+  */
+ static GLuint
+ get_handle(GLcontext *ctx, GLenum pname)
+ {
+    if (pname == GL_PROGRAM_OBJECT_ARB) {
+       if (ctx->Shader.CurrentProgram)
+          return ctx->Shader.CurrentProgram->Name;
+       else
+          return 0;
+    }
+    else {
+       _mesa_error(ctx, GL_INVALID_ENUM, "glGetHandleARB");
+       return 0;
+    }
+ }
+ 
+ 
+ /**
+  * glGetProgramiv() - get shader program state.
+  * Note that this is for GLSL shader programs, not ARB vertex/fragment
+  * programs (see glGetProgramivARB).
+  */
+ static void
+ get_programiv(GLcontext *ctx, GLuint program, GLenum pname, GLint *params)
+ {
+    const struct gl_program_parameter_list *attribs;
+    struct gl_shader_program *shProg
+       = _mesa_lookup_shader_program(ctx, program);
+ 
+    if (!shProg) {
+       _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramiv(program)");
+       return;
+    }
+ 
+    if (shProg->VertexProgram)
+       attribs = shProg->VertexProgram->Base.Attributes;
+    else
+       attribs = NULL;
+ 
+    switch (pname) {
+    case GL_DELETE_STATUS:
+       *params = shProg->DeletePending;
+       break; 
+    case GL_LINK_STATUS:
+       *params = shProg->LinkStatus;
+       break;
+    case GL_VALIDATE_STATUS:
+       *params = shProg->Validated;
+       break;
+    case GL_INFO_LOG_LENGTH:
+       *params = shProg->InfoLog ? strlen(shProg->InfoLog) + 1 : 0;
+       break;
+    case GL_ATTACHED_SHADERS:
+       *params = shProg->NumShaders;
+       break;
+    case GL_ACTIVE_ATTRIBUTES:
+       *params = attribs ? attribs->NumParameters : 0;
+       break;
+    case GL_ACTIVE_ATTRIBUTE_MAX_LENGTH:
+       *params = _mesa_longest_parameter_name(attribs, PROGRAM_INPUT) + 1;
+       break;
+    case GL_ACTIVE_UNIFORMS:
+       *params = shProg->Uniforms ? shProg->Uniforms->NumUniforms : 0;
+       break;
+    case GL_ACTIVE_UNIFORM_MAX_LENGTH:
+       *params = _mesa_longest_uniform_name(shProg->Uniforms);
+       if (*params > 0)
+          (*params)++;  /* add one for terminating zero */
+       break;
+    case GL_PROGRAM_BINARY_LENGTH_OES:
+       *params = 0;
+       break;
+ #if FEATURE_EXT_transform_feedback
+    case GL_TRANSFORM_FEEDBACK_VARYINGS:
+       *params = shProg->TransformFeedback.NumVarying;
+       break;
+    case GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH:
+       *params = longest_feedback_varying_name(shProg) + 1;
+       break;
+    case GL_TRANSFORM_FEEDBACK_BUFFER_MODE:
+       *params = shProg->TransformFeedback.BufferMode;
+       break;
+ #endif
+ #if FEATURE_ARB_geometry_shader4
+    case GL_GEOMETRY_VERTICES_OUT_ARB:
+       *params = shProg->Geom.VerticesOut;
+       break;
+    case GL_GEOMETRY_INPUT_TYPE_ARB:
+       *params = shProg->Geom.InputType;
+       break;
+    case GL_GEOMETRY_OUTPUT_TYPE_ARB:
+       *params = shProg->Geom.OutputType;
+       break;
+ #endif
+    default:
+       _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramiv(pname)");
+       return;
+    }
+ }
+ 
+ 
+ /**
+  * glGetShaderiv() - get GLSL shader state
+  */
+ static void
+ get_shaderiv(GLcontext *ctx, GLuint name, GLenum pname, GLint *params)
+ {
+    struct gl_shader *shader =
+       _mesa_lookup_shader_err(ctx, name, "glGetShaderiv");
+ 
+    if (!shader) {
+       return;
+    }
+ 
+    switch (pname) {
+    case GL_SHADER_TYPE:
+       *params = shader->Type;
+       break;
+    case GL_DELETE_STATUS:
+       *params = shader->DeletePending;
+       break;
+    case GL_COMPILE_STATUS:
+       *params = shader->CompileStatus;
+       break;
+    case GL_INFO_LOG_LENGTH:
+       *params = shader->InfoLog ? strlen(shader->InfoLog) + 1 : 0;
+       break;
+    case GL_SHADER_SOURCE_LENGTH:
+       *params = shader->Source ? strlen((char *) shader->Source) + 1 : 0;
+       break;
+    default:
+       _mesa_error(ctx, GL_INVALID_ENUM, "glGetShaderiv(pname)");
+       return;
+    }
+ }
+ 
+ 
+ static void
+ get_program_info_log(GLcontext *ctx, GLuint program, GLsizei bufSize,
+                      GLsizei *length, GLchar *infoLog)
+ {
+    struct gl_shader_program *shProg
+       = _mesa_lookup_shader_program(ctx, program);
+    if (!shProg) {
+       _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramInfoLog(program)");
+       return;
+    }
+    _mesa_copy_string(infoLog, bufSize, length, shProg->InfoLog);
+ }
+ 
+ 
+ static void
+ get_shader_info_log(GLcontext *ctx, GLuint shader, GLsizei bufSize,
+                     GLsizei *length, GLchar *infoLog)
+ {
+    struct gl_shader *sh = _mesa_lookup_shader(ctx, shader);
+    if (!sh) {
+       _mesa_error(ctx, GL_INVALID_VALUE, "glGetShaderInfoLog(shader)");
+       return;
+    }
+    _mesa_copy_string(infoLog, bufSize, length, sh->InfoLog);
+ }
+ 
+ 
+ /**
+  * Return shader source code.
+  */
+ static void
+ get_shader_source(GLcontext *ctx, GLuint shader, GLsizei maxLength,
+                   GLsizei *length, GLchar *sourceOut)
+ {
+    struct gl_shader *sh;
+    sh = _mesa_lookup_shader_err(ctx, shader, "glGetShaderSource");
+    if (!sh) {
+       return;
+    }
+    _mesa_copy_string(sourceOut, maxLength, length, sh->Source);
+ }
+ 
+ 
+ /**
+  * Set/replace shader source code.
+  */
+ static void
+ shader_source(GLcontext *ctx, GLuint shader, const GLchar *source)
+ {
+    struct gl_shader *sh;
+ 
+    sh = _mesa_lookup_shader_err(ctx, shader, "glShaderSource");
+    if (!sh)
+       return;
+ 
+    /* free old shader source string and install new one */
+    if (sh->Source) {
+       free((void *) sh->Source);
+    }
+    sh->Source = source;
+    sh->CompileStatus = GL_FALSE;
+ #ifdef DEBUG
+    sh->SourceChecksum = _mesa_str_checksum(sh->Source);
+ #endif
+ }
+ 
+ 
+ /**
+  * Compile a shader.
+  */
+ static void
+ compile_shader(GLcontext *ctx, GLuint shaderObj)
+ {
+    struct gl_shader *sh;
+ 
+    sh = _mesa_lookup_shader_err(ctx, shaderObj, "glCompileShader");
+    if (!sh)
+       return;
+ 
+    /* set default pragma state for shader */
+    sh->Pragmas = ctx->Shader.DefaultPragmas;
+ 
+    /* this call will set the sh->CompileStatus field to indicate if
+     * compilation was successful.
+     */
 -   (void) _slang_compile(ctx, sh);
++   _mesa_glsl_compile_shader(ctx, sh);
+ }
+ 
+ 
+ /**
+  * Link a program's shaders.
+  */
+ static void
+ link_program(GLcontext *ctx, GLuint program)
+ {
+    struct gl_shader_program *shProg;
+    struct gl_transform_feedback_object *obj =
+       ctx->TransformFeedback.CurrentObject;
+ 
+    shProg = _mesa_lookup_shader_program_err(ctx, program, "glLinkProgram");
+    if (!shProg)
+       return;
+ 
+    if (obj->Active && shProg == ctx->Shader.CurrentProgram) {
+       _mesa_error(ctx, GL_INVALID_OPERATION,
+                   "glLinkProgram(transform feedback active");
+       return;
+    }
+ 
+    FLUSH_VERTICES(ctx, _NEW_PROGRAM);
+ 
 -   _slang_link(ctx, program, shProg);
++   _mesa_glsl_link_shader(ctx, shProg);
+ 
+    /* debug code */
+    if (0) {
+       GLuint i;
+ 
+       printf("Link %u shaders in program %u: %s\n",
+                    shProg->NumShaders, shProg->Name,
+                    shProg->LinkStatus ? "Success" : "Failed");
+ 
+       for (i = 0; i < shProg->NumShaders; i++) {
+          printf(" shader %u, type 0x%x\n",
+                       shProg->Shaders[i]->Name,
+                       shProg->Shaders[i]->Type);
+       }
+    }
+ }
+ 
+ 
+ /**
+  * Print basic shader info (for debug).
+  */
+ static void
+ print_shader_info(const struct gl_shader_program *shProg)
+ {
+    GLuint i;
+ 
+    printf("Mesa: glUseProgram(%u)\n", shProg->Name);
+    for (i = 0; i < shProg->NumShaders; i++) {
+       const char *s;
+       switch (shProg->Shaders[i]->Type) {
+       case GL_VERTEX_SHADER:
+          s = "vertex";
+          break;
+       case GL_FRAGMENT_SHADER:
+          s = "fragment";
+          break;
+       case GL_GEOMETRY_SHADER:
+          s = "geometry";
+          break;
+       default:
+          s = "";
+       }
+       printf("  %s shader %u, checksum %u\n", s, 
+ 	     shProg->Shaders[i]->Name,
+ 	     shProg->Shaders[i]->SourceChecksum);
+    }
+    if (shProg->VertexProgram)
+       printf("  vert prog %u\n", shProg->VertexProgram->Base.Id);
+    if (shProg->FragmentProgram)
+       printf("  frag prog %u\n", shProg->FragmentProgram->Base.Id);
+ }
+ 
+ 
+ /**
+  * Use the named shader program for subsequent rendering.
+  */
+ void
+ _mesa_use_program(GLcontext *ctx, GLuint program)
+ {
+    struct gl_shader_program *shProg;
+    struct gl_transform_feedback_object *obj =
+       ctx->TransformFeedback.CurrentObject;
+ 
+    if (obj->Active) {
+       _mesa_error(ctx, GL_INVALID_OPERATION,
+                   "glUseProgram(transform feedback active)");
+       return;
+    }
+ 
+    if (ctx->Shader.CurrentProgram &&
+        ctx->Shader.CurrentProgram->Name == program) {
+       /* no-op */
+       return;
+    }
+ 
+    if (program) {
+       shProg = _mesa_lookup_shader_program_err(ctx, program, "glUseProgram");
+       if (!shProg) {
+          return;
+       }
+       if (!shProg->LinkStatus) {
+          _mesa_error(ctx, GL_INVALID_OPERATION,
+                      "glUseProgram(program %u not linked)", program);
+          return;
+       }
+ 
+       /* debug code */
+       if (ctx->Shader.Flags & GLSL_USE_PROG) {
+          print_shader_info(shProg);
+       }
+    }
+    else {
+       shProg = NULL;
+    }
+ 
+    if (ctx->Shader.CurrentProgram != shProg) {
+       FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+       _mesa_reference_shader_program(ctx, &ctx->Shader.CurrentProgram, shProg);
+    }
+ 
+    if (ctx->Driver.UseProgram)
+       ctx->Driver.UseProgram(ctx, shProg);
+ }
+ 
+ 
+ /**
+  * Validate a program's samplers.
+  * Specifically, check that there aren't two samplers of different types
+  * pointing to the same texture unit.
+  * \return GL_TRUE if valid, GL_FALSE if invalid
+  */
+ static GLboolean
+ validate_samplers(GLcontext *ctx, const struct gl_program *prog, char *errMsg)
+ {
+    static const char *targetName[] = {
+       "TEXTURE_2D_ARRAY",
+       "TEXTURE_1D_ARRAY",
+       "TEXTURE_CUBE",
+       "TEXTURE_3D",
+       "TEXTURE_RECT",
+       "TEXTURE_2D",
+       "TEXTURE_1D",
+    };
+    GLint targetUsed[MAX_TEXTURE_IMAGE_UNITS];
+    GLbitfield samplersUsed = prog->SamplersUsed;
+    GLuint i;
+ 
+    assert(Elements(targetName) == NUM_TEXTURE_TARGETS);
+ 
+    if (samplersUsed == 0x0)
+       return GL_TRUE;
+ 
+    for (i = 0; i < Elements(targetUsed); i++)
+       targetUsed[i] = -1;
+ 
+    /* walk over bits which are set in 'samplers' */
+    while (samplersUsed) {
+       GLuint unit;
+       gl_texture_index target;
+       GLint sampler = _mesa_ffs(samplersUsed) - 1;
+       assert(sampler >= 0);
+       assert(sampler < MAX_TEXTURE_IMAGE_UNITS);
+       unit = prog->SamplerUnits[sampler];
+       target = prog->SamplerTargets[sampler];
+       if (targetUsed[unit] != -1 && targetUsed[unit] != target) {
+          _mesa_snprintf(errMsg, 100,
+ 		  "Texture unit %d is accessed both as %s and %s",
+ 		  unit, targetName[targetUsed[unit]], targetName[target]);
+          return GL_FALSE;
+       }
+       targetUsed[unit] = target;
+       samplersUsed ^= (1 << sampler);
+    }
+ 
+    return GL_TRUE;
+ }
+ 
+ 
+ /**
+  * Do validation of the given shader program.
+  * \param errMsg  returns error message if validation fails.
+  * \return GL_TRUE if valid, GL_FALSE if invalid (and set errMsg)
+  */
+ static GLboolean
+ validate_shader_program(GLcontext *ctx,
+                         const struct gl_shader_program *shProg,
+                         char *errMsg)
+ {
+    const struct gl_vertex_program *vp = shProg->VertexProgram;
+    const struct gl_fragment_program *fp = shProg->FragmentProgram;
+ 
+    if (!shProg->LinkStatus) {
+       return GL_FALSE;
+    }
+ 
+    /* From the GL spec, a program is invalid if any of these are true:
+ 
+      any two active samplers in the current program object are of
+      different types, but refer to the same texture image unit,
+ 
+      any active sampler in the current program object refers to a texture
+      image unit where fixed-function fragment processing accesses a
+      texture target that does not match the sampler type, or 
+ 
+      the sum of the number of active samplers in the program and the
+      number of texture image units enabled for fixed-function fragment
+      processing exceeds the combined limit on the total number of texture
+      image units allowed.
+    */
+ 
+ 
+    /*
+     * Check: any two active samplers in the current program object are of
+     * different types, but refer to the same texture image unit,
+     */
+    if (vp && !validate_samplers(ctx, &vp->Base, errMsg)) {
+       return GL_FALSE;
+    }
+    if (fp && !validate_samplers(ctx, &fp->Base, errMsg)) {
+       return GL_FALSE;
+    }
+ 
+    return GL_TRUE;
+ }
+ 
+ 
+ /**
+  * Called via glValidateProgram()
+  */
+ static void
+ validate_program(GLcontext *ctx, GLuint program)
+ {
+    struct gl_shader_program *shProg;
+    char errMsg[100];
+ 
+    shProg = _mesa_lookup_shader_program_err(ctx, program, "glValidateProgram");
+    if (!shProg) {
+       return;
+    }
+ 
+    shProg->Validated = validate_shader_program(ctx, shProg, errMsg);
+    if (!shProg->Validated) {
+       /* update info log */
+       if (shProg->InfoLog) {
 -         free(shProg->InfoLog);
++         talloc_free(shProg->InfoLog);
+       }
 -      shProg->InfoLog = _mesa_strdup(errMsg);
++      shProg->InfoLog = talloc_strdup(shProg, errMsg);
+    }
+ }
+ 
+ 
+ 
+ void GLAPIENTRY
+ _mesa_AttachObjectARB(GLhandleARB program, GLhandleARB shader)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    attach_shader(ctx, program, shader);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_AttachShader(GLuint program, GLuint shader)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    attach_shader(ctx, program, shader);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_BindAttribLocationARB(GLhandleARB program, GLuint index,
+                             const GLcharARB *name)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    bind_attrib_location(ctx, program, index, name);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_CompileShaderARB(GLhandleARB shaderObj)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    compile_shader(ctx, shaderObj);
+ }
+ 
+ 
+ GLuint GLAPIENTRY
+ _mesa_CreateShader(GLenum type)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    return create_shader(ctx, type);
+ }
+ 
+ 
+ GLhandleARB GLAPIENTRY
+ _mesa_CreateShaderObjectARB(GLenum type)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    return create_shader(ctx, type);
+ }
+ 
+ 
+ GLuint GLAPIENTRY
+ _mesa_CreateProgram(void)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    return create_shader_program(ctx);
+ }
+ 
+ 
+ GLhandleARB GLAPIENTRY
+ _mesa_CreateProgramObjectARB(void)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    return create_shader_program(ctx);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_DeleteObjectARB(GLhandleARB obj)
+ {
+    if (obj) {
+       GET_CURRENT_CONTEXT(ctx);
+       if (is_program(ctx, obj)) {
+          delete_shader_program(ctx, obj);
+       }
+       else if (is_shader(ctx, obj)) {
+          delete_shader(ctx, obj);
+       }
+       else {
+          /* error? */
+       }
+    }
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_DeleteProgram(GLuint name)
+ {
+    if (name) {
+       GET_CURRENT_CONTEXT(ctx);
+       delete_shader_program(ctx, name);
+    }
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_DeleteShader(GLuint name)
+ {
+    if (name) {
+       GET_CURRENT_CONTEXT(ctx);
+       delete_shader(ctx, name);
+    }
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_DetachObjectARB(GLhandleARB program, GLhandleARB shader)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    detach_shader(ctx, program, shader);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_DetachShader(GLuint program, GLuint shader)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    detach_shader(ctx, program, shader);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_GetActiveAttribARB(GLhandleARB program, GLuint index,
+                          GLsizei maxLength, GLsizei * length, GLint * size,
+                          GLenum * type, GLcharARB * name)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    get_active_attrib(ctx, program, index, maxLength, length, size, type, name);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_GetAttachedObjectsARB(GLhandleARB container, GLsizei maxCount,
+                             GLsizei * count, GLhandleARB * obj)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    get_attached_shaders(ctx, container, maxCount, count, obj);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_GetAttachedShaders(GLuint program, GLsizei maxCount,
+                          GLsizei *count, GLuint *obj)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    get_attached_shaders(ctx, program, maxCount, count, obj);
+ }
+ 
+ 
+ GLint GLAPIENTRY
+ _mesa_GetAttribLocationARB(GLhandleARB program, const GLcharARB * name)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    return get_attrib_location(ctx, program, name);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_GetInfoLogARB(GLhandleARB object, GLsizei maxLength, GLsizei * length,
+                     GLcharARB * infoLog)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    if (is_program(ctx, object)) {
+       get_program_info_log(ctx, object, maxLength, length, infoLog);
+    }
+    else if (is_shader(ctx, object)) {
+       get_shader_info_log(ctx, object, maxLength, length, infoLog);
+    }
+    else {
+       _mesa_error(ctx, GL_INVALID_OPERATION, "glGetInfoLogARB");
+    }
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_GetObjectParameterivARB(GLhandleARB object, GLenum pname, GLint *params)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    /* Implement in terms of GetProgramiv, GetShaderiv */
+    if (is_program(ctx, object)) {
+       if (pname == GL_OBJECT_TYPE_ARB) {
+ 	 *params = GL_PROGRAM_OBJECT_ARB;
+       }
+       else {
+ 	 get_programiv(ctx, object, pname, params);
+       }
+    }
+    else if (is_shader(ctx, object)) {
+       if (pname == GL_OBJECT_TYPE_ARB) {
+ 	 *params = GL_SHADER_OBJECT_ARB;
+       }
+       else {
+ 	 get_shaderiv(ctx, object, pname, params);
+       }
+    }
+    else {
+       _mesa_error(ctx, GL_INVALID_VALUE, "glGetObjectParameterivARB");
+    }
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_GetObjectParameterfvARB(GLhandleARB object, GLenum pname,
+                               GLfloat *params)
+ {
+    GLint iparams[1];  /* XXX is one element enough? */
+    _mesa_GetObjectParameterivARB(object, pname, iparams);
+    params[0] = (GLfloat) iparams[0];
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_GetProgramiv(GLuint program, GLenum pname, GLint *params)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    get_programiv(ctx, program, pname, params);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_GetShaderiv(GLuint shader, GLenum pname, GLint *params)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    get_shaderiv(ctx, shader, pname, params);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_GetProgramInfoLog(GLuint program, GLsizei bufSize,
+                         GLsizei *length, GLchar *infoLog)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    get_program_info_log(ctx, program, bufSize, length, infoLog);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_GetShaderInfoLog(GLuint shader, GLsizei bufSize,
+                        GLsizei *length, GLchar *infoLog)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    get_shader_info_log(ctx, shader, bufSize, length, infoLog);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_GetShaderSourceARB(GLhandleARB shader, GLsizei maxLength,
+                          GLsizei *length, GLcharARB *sourceOut)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    get_shader_source(ctx, shader, maxLength, length, sourceOut);
+ }
+ 
+ 
+ GLhandleARB GLAPIENTRY
+ _mesa_GetHandleARB(GLenum pname)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    return get_handle(ctx, pname);
+ }
+ 
+ 
+ GLboolean GLAPIENTRY
+ _mesa_IsProgram(GLuint name)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    return is_program(ctx, name);
+ }
+ 
+ 
+ GLboolean GLAPIENTRY
+ _mesa_IsShader(GLuint name)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    return is_shader(ctx, name);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_LinkProgramARB(GLhandleARB programObj)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    link_program(ctx, programObj);
+ }
+ 
+ 
+ 
+ /**
+  * Read shader source code from a file.
+  * Useful for debugging to override an app's shader.
+  */
+ static GLcharARB *
+ read_shader(const char *fname)
+ {
+    const int max = 50*1000;
+    FILE *f = fopen(fname, "r");
+    GLcharARB *buffer, *shader;
+    int len;
+ 
+    if (!f) {
+       return NULL;
+    }
+ 
+    buffer = (char *) malloc(max);
+    len = fread(buffer, 1, max, f);
+    buffer[len] = 0;
+ 
+    fclose(f);
+ 
+    shader = _mesa_strdup(buffer);
+    free(buffer);
+ 
+    return shader;
+ }
+ 
+ 
+ /**
+  * Called via glShaderSource() and glShaderSourceARB() API functions.
+  * Basically, concatenate the source code strings into one long string
+  * and pass it to _mesa_shader_source().
+  */
+ void GLAPIENTRY
+ _mesa_ShaderSourceARB(GLhandleARB shaderObj, GLsizei count,
+                       const GLcharARB ** string, const GLint * length)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    GLint *offsets;
+    GLsizei i, totalLength;
+    GLcharARB *source;
+    GLuint checksum;
+ 
+    if (!shaderObj || string == NULL) {
+       _mesa_error(ctx, GL_INVALID_VALUE, "glShaderSourceARB");
+       return;
+    }
+ 
+    /*
+     * This array holds offsets of where the appropriate string ends, thus the
+     * last element will be set to the total length of the source code.
+     */
+    offsets = (GLint *) malloc(count * sizeof(GLint));
+    if (offsets == NULL) {
+       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glShaderSourceARB");
+       return;
+    }
+ 
+    for (i = 0; i < count; i++) {
+       if (string[i] == NULL) {
+          free((GLvoid *) offsets);
+          _mesa_error(ctx, GL_INVALID_OPERATION, "glShaderSourceARB(null string)");
+          return;
+       }
+       if (length == NULL || length[i] < 0)
+          offsets[i] = strlen(string[i]);
+       else
+          offsets[i] = length[i];
+       /* accumulate string lengths */
+       if (i > 0)
+          offsets[i] += offsets[i - 1];
+    }
+ 
+    /* Total length of source string is sum off all strings plus two.
+     * One extra byte for terminating zero, another extra byte to silence
+     * valgrind warnings in the parser/grammer code.
+     */
+    totalLength = offsets[count - 1] + 2;
+    source = (GLcharARB *) malloc(totalLength * sizeof(GLcharARB));
+    if (source == NULL) {
+       free((GLvoid *) offsets);
+       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glShaderSourceARB");
+       return;
+    }
+ 
+    for (i = 0; i < count; i++) {
+       GLint start = (i > 0) ? offsets[i - 1] : 0;
+       memcpy(source + start, string[i],
+              (offsets[i] - start) * sizeof(GLcharARB));
+    }
+    source[totalLength - 1] = '\0';
+    source[totalLength - 2] = '\0';
+ 
+    if (SHADER_SUBST) {
+       /* Compute the shader's source code checksum then try to open a file
+        * named newshader_<CHECKSUM>.  If it exists, use it in place of the
+        * original shader source code.  For debugging.
+        */
+       char filename[100];
+       GLcharARB *newSource;
+ 
+       checksum = _mesa_str_checksum(source);
+ 
+       _mesa_snprintf(filename, sizeof(filename), "newshader_%d", checksum);
+ 
+       newSource = read_shader(filename);
+       if (newSource) {
+          fprintf(stderr, "Mesa: Replacing shader %u chksum=%d with %s\n",
+                        shaderObj, checksum, filename);
+          free(source);
+          source = newSource;
+       }
+    }      
+ 
+    shader_source(ctx, shaderObj, source);
+ 
+    if (SHADER_SUBST) {
+       struct gl_shader *sh = _mesa_lookup_shader(ctx, shaderObj);
+       if (sh)
+          sh->SourceChecksum = checksum; /* save original checksum */
+    }
+ 
+    free(offsets);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_UseProgramObjectARB(GLhandleARB program)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    FLUSH_VERTICES(ctx, _NEW_PROGRAM);
+    _mesa_use_program(ctx, program);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_ValidateProgramARB(GLhandleARB program)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    validate_program(ctx, program);
+ }
+ 
+ #ifdef FEATURE_ES2
+ 
+ void GLAPIENTRY
+ _mesa_GetShaderPrecisionFormat(GLenum shadertype, GLenum precisiontype,
+                                GLint* range, GLint* precision)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    _mesa_error(ctx, GL_INVALID_OPERATION, __FUNCTION__);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_ReleaseShaderCompiler(void)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    _mesa_error(ctx, GL_INVALID_OPERATION, __FUNCTION__);
+ }
+ 
+ 
+ void GLAPIENTRY
+ _mesa_ShaderBinary(GLint n, const GLuint* shaders, GLenum binaryformat,
+                    const void* binary, GLint length)
+ {
+    GET_CURRENT_CONTEXT(ctx);
+    _mesa_error(ctx, GL_INVALID_OPERATION, __FUNCTION__);
+ }
+ 
+ #endif /* FEATURE_ES2 */
+ 
+ 
+ #if FEATURE_ARB_geometry_shader4
+ 
+ void GLAPIENTRY
+ _mesa_ProgramParameteriARB(GLuint program, GLenum pname,
+                            GLint value)
+ {
+    struct gl_shader_program *shProg;
+    GET_CURRENT_CONTEXT(ctx);
+ 
+    ASSERT_OUTSIDE_BEGIN_END(ctx);
+ 
+    shProg = _mesa_lookup_shader_program_err(ctx, program,
+                                             "glProgramParameteri");
+    if (!shProg)
+       return;
+ 
+    switch (pname) {
+    case GL_GEOMETRY_VERTICES_OUT_ARB:
+       if (value < 1 ||
+           value > ctx->Const.GeometryProgram.MaxGeometryOutputVertices) {
+          _mesa_error(ctx, GL_INVALID_VALUE,
+                      "glProgramParameteri(GL_GEOMETRY_VERTICES_OUT_ARB=%d",
+                      value);
+          return;
+       }
+       shProg->Geom.VerticesOut = value;
+       break;
+    case GL_GEOMETRY_INPUT_TYPE_ARB:
+       switch (value) {
+       case GL_POINTS:
+       case GL_LINES:
+       case GL_LINES_ADJACENCY_ARB:
+       case GL_TRIANGLES:
+       case GL_TRIANGLES_ADJACENCY_ARB:
+          shProg->Geom.InputType = value;
+          break;
+       default:
+          _mesa_error(ctx, GL_INVALID_VALUE,
+                      "glProgramParameteri(geometry input type = %s",
+                      _mesa_lookup_enum_by_nr(value));
+          return;
+       }
+       break;
+    case GL_GEOMETRY_OUTPUT_TYPE_ARB:
+       switch (value) {
+       case GL_POINTS:
+       case GL_LINE_STRIP:
+       case GL_TRIANGLE_STRIP:
+          shProg->Geom.OutputType = value;
+          break;
+       default:
+          _mesa_error(ctx, GL_INVALID_VALUE,
+                      "glProgramParameteri(geometry output type = %s",
+                      _mesa_lookup_enum_by_nr(value));
+          return;
+       }
+       break;
+    default:
+       _mesa_error(ctx, GL_INVALID_ENUM, "glProgramParameteriARB(pname=%s)",
+                   _mesa_lookup_enum_by_nr(pname));
+       break;
+    }
+ }
+ 
+ #endif
+ 
+ 
+ /**
+  * Plug in shader-related functions into API dispatch table.
+  */
+ void
+ _mesa_init_shader_dispatch(struct _glapi_table *exec)
+ {
+ #if FEATURE_GL
+    /* GL_ARB_vertex/fragment_shader */
+    SET_DeleteObjectARB(exec, _mesa_DeleteObjectARB);
+    SET_GetHandleARB(exec, _mesa_GetHandleARB);
+    SET_DetachObjectARB(exec, _mesa_DetachObjectARB);
+    SET_CreateShaderObjectARB(exec, _mesa_CreateShaderObjectARB);
+    SET_ShaderSourceARB(exec, _mesa_ShaderSourceARB);
+    SET_CompileShaderARB(exec, _mesa_CompileShaderARB);
+    SET_CreateProgramObjectARB(exec, _mesa_CreateProgramObjectARB);
+    SET_AttachObjectARB(exec, _mesa_AttachObjectARB);
+    SET_LinkProgramARB(exec, _mesa_LinkProgramARB);
+    SET_UseProgramObjectARB(exec, _mesa_UseProgramObjectARB);
+    SET_ValidateProgramARB(exec, _mesa_ValidateProgramARB);
+    SET_GetObjectParameterfvARB(exec, _mesa_GetObjectParameterfvARB);
+    SET_GetObjectParameterivARB(exec, _mesa_GetObjectParameterivARB);
+    SET_GetInfoLogARB(exec, _mesa_GetInfoLogARB);
+    SET_GetAttachedObjectsARB(exec, _mesa_GetAttachedObjectsARB);
+    SET_GetShaderSourceARB(exec, _mesa_GetShaderSourceARB);
+ 
+    /* OpenGL 2.0 */
+    SET_AttachShader(exec, _mesa_AttachShader);
+    SET_CreateProgram(exec, _mesa_CreateProgram);
+    SET_CreateShader(exec, _mesa_CreateShader);
+    SET_DeleteProgram(exec, _mesa_DeleteProgram);
+    SET_DeleteShader(exec, _mesa_DeleteShader);
+    SET_DetachShader(exec, _mesa_DetachShader);
+    SET_GetAttachedShaders(exec, _mesa_GetAttachedShaders);
+    SET_GetProgramiv(exec, _mesa_GetProgramiv);
+    SET_GetProgramInfoLog(exec, _mesa_GetProgramInfoLog);
+    SET_GetShaderiv(exec, _mesa_GetShaderiv);
+    SET_GetShaderInfoLog(exec, _mesa_GetShaderInfoLog);
+    SET_IsProgram(exec, _mesa_IsProgram);
+    SET_IsShader(exec, _mesa_IsShader);
+ 
+ #if FEATURE_ARB_vertex_shader
+    SET_BindAttribLocationARB(exec, _mesa_BindAttribLocationARB);
+    SET_GetActiveAttribARB(exec, _mesa_GetActiveAttribARB);
+    SET_GetAttribLocationARB(exec, _mesa_GetAttribLocationARB);
+ #endif
+ 
+ #if FEATURE_ARB_geometry_shader4
+    SET_ProgramParameteriARB(exec, _mesa_ProgramParameteriARB);
+ #endif
+ #endif /* FEATURE_GL */
+ }
+ 
diff --cc src/mesa/main/shaderobj.c
index 00000000000,14bbb2e4bc3..129d9742247
mode 000000,100644..100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@@ -1,0 -1,392 +1,390 @@@
+ /*
+  * Mesa 3-D graphics library
+  *
+  * Copyright (C) 2004-2008  Brian Paul   All Rights Reserved.
+  * Copyright (C) 2009-2010  VMware, Inc.  All Rights Reserved.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included
+  * in all copies or substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  */
+ 
+ /**
+  * \file shaderobj.c
+  * \author Brian Paul
+  *
+  */
+ 
+ 
+ #include "main/glheader.h"
+ #include "main/context.h"
+ #include "main/hash.h"
+ #include "main/shaderobj.h"
+ #include "program/program.h"
+ #include "program/prog_parameter.h"
+ #include "program/prog_uniform.h"
 -
++#include "talloc.h"
+ 
+ /**********************************************************************/
+ /*** Shader object functions                                        ***/
+ /**********************************************************************/
+ 
+ 
+ /**
+  * Set ptr to point to sh.
+  * If ptr is pointing to another shader, decrement its refcount (and delete
+  * if refcount hits zero).
+  * Then set ptr to point to sh, incrementing its refcount.
+  */
+ void
+ _mesa_reference_shader(GLcontext *ctx, struct gl_shader **ptr,
+                        struct gl_shader *sh)
+ {
+    assert(ptr);
+    if (*ptr == sh) {
+       /* no-op */
+       return;
+    }
+    if (*ptr) {
+       /* Unreference the old shader */
+       GLboolean deleteFlag = GL_FALSE;
+       struct gl_shader *old = *ptr;
+ 
+       ASSERT(old->RefCount > 0);
+       old->RefCount--;
+       /*printf("SHADER DECR %p (%d) to %d\n",
+         (void*) old, old->Name, old->RefCount);*/
+       deleteFlag = (old->RefCount == 0);
+ 
+       if (deleteFlag) {
+          _mesa_HashRemove(ctx->Shared->ShaderObjects, old->Name);
+          ctx->Driver.DeleteShader(ctx, old);
+       }
+ 
+       *ptr = NULL;
+    }
+    assert(!*ptr);
+ 
+    if (sh) {
+       /* reference new */
+       sh->RefCount++;
+       /*printf("SHADER INCR %p (%d) to %d\n",
+         (void*) sh, sh->Name, sh->RefCount);*/
+       *ptr = sh;
+    }
+ }
+ 
+ 
+ /**
+  * Allocate a new gl_shader object, initialize it.
+  * Called via ctx->Driver.NewShader()
+  */
 -static struct gl_shader *
++struct gl_shader *
+ _mesa_new_shader(GLcontext *ctx, GLuint name, GLenum type)
+ {
+    struct gl_shader *shader;
+    assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
+           type == GL_GEOMETRY_SHADER_ARB);
 -   shader = CALLOC_STRUCT(gl_shader);
++   shader = talloc_zero(NULL, struct gl_shader);
+    if (shader) {
+       shader->Type = type;
+       shader->Name = name;
+       shader->RefCount = 1;
+    }
+    return shader;
+ }
+ 
+ 
+ /**
+  * Delete a shader object.
+  * Called via ctx->Driver.DeleteShader().
+  */
+ static void
+ __mesa_delete_shader(GLcontext *ctx, struct gl_shader *sh)
+ {
+    if (sh->Source)
+       free((void *) sh->Source);
 -   if (sh->InfoLog)
 -      free(sh->InfoLog);
+    _mesa_reference_program(ctx, &sh->Program, NULL);
 -   free(sh);
++   talloc_free(sh);
+ }
+ 
+ 
+ /**
+  * Lookup a GLSL shader object.
+  */
+ struct gl_shader *
+ _mesa_lookup_shader(GLcontext *ctx, GLuint name)
+ {
+    if (name) {
+       struct gl_shader *sh = (struct gl_shader *)
+          _mesa_HashLookup(ctx->Shared->ShaderObjects, name);
+       /* Note that both gl_shader and gl_shader_program objects are kept
+        * in the same hash table.  Check the object's type to be sure it's
+        * what we're expecting.
+        */
+       if (sh && sh->Type == GL_SHADER_PROGRAM_MESA) {
+          return NULL;
+       }
+       return sh;
+    }
+    return NULL;
+ }
+ 
+ 
+ /**
+  * As above, but record an error if shader is not found.
+  */
+ struct gl_shader *
+ _mesa_lookup_shader_err(GLcontext *ctx, GLuint name, const char *caller)
+ {
+    if (!name) {
+       _mesa_error(ctx, GL_INVALID_VALUE, caller);
+       return NULL;
+    }
+    else {
+       struct gl_shader *sh = (struct gl_shader *)
+          _mesa_HashLookup(ctx->Shared->ShaderObjects, name);
+       if (!sh) {
+          _mesa_error(ctx, GL_INVALID_VALUE, caller);
+          return NULL;
+       }
+       if (sh->Type == GL_SHADER_PROGRAM_MESA) {
+          _mesa_error(ctx, GL_INVALID_OPERATION, caller);
+          return NULL;
+       }
+       return sh;
+    }
+ }
+ 
+ 
+ 
+ /**********************************************************************/
+ /*** Shader Program object functions                                ***/
+ /**********************************************************************/
+ 
+ 
+ /**
+  * Set ptr to point to shProg.
+  * If ptr is pointing to another object, decrement its refcount (and delete
+  * if refcount hits zero).
+  * Then set ptr to point to shProg, incrementing its refcount.
+  */
+ void
+ _mesa_reference_shader_program(GLcontext *ctx,
+                                struct gl_shader_program **ptr,
+                                struct gl_shader_program *shProg)
+ {
+    assert(ptr);
+    if (*ptr == shProg) {
+       /* no-op */
+       return;
+    }
+    if (*ptr) {
+       /* Unreference the old shader program */
+       GLboolean deleteFlag = GL_FALSE;
+       struct gl_shader_program *old = *ptr;
+ 
+       ASSERT(old->RefCount > 0);
+       old->RefCount--;
+ #if 0
+       printf("ShaderProgram %p ID=%u  RefCount-- to %d\n",
+              (void *) old, old->Name, old->RefCount);
+ #endif
+       deleteFlag = (old->RefCount == 0);
+ 
+       if (deleteFlag) {
+          _mesa_HashRemove(ctx->Shared->ShaderObjects, old->Name);
+          ctx->Driver.DeleteShaderProgram(ctx, old);
+       }
+ 
+       *ptr = NULL;
+    }
+    assert(!*ptr);
+ 
+    if (shProg) {
+       shProg->RefCount++;
+ #if 0
+       printf("ShaderProgram %p ID=%u  RefCount++ to %d\n",
+              (void *) shProg, shProg->Name, shProg->RefCount);
+ #endif
+       *ptr = shProg;
+    }
+ }
+ 
+ 
+ /**
+  * Allocate a new gl_shader_program object, initialize it.
+  * Called via ctx->Driver.NewShaderProgram()
+  */
+ static struct gl_shader_program *
+ _mesa_new_shader_program(GLcontext *ctx, GLuint name)
+ {
+    struct gl_shader_program *shProg;
 -   shProg = CALLOC_STRUCT(gl_shader_program);
++   shProg = talloc_zero(NULL, struct gl_shader_program);
+    if (shProg) {
+       shProg->Type = GL_SHADER_PROGRAM_MESA;
+       shProg->Name = name;
+       shProg->RefCount = 1;
+       shProg->Attributes = _mesa_new_parameter_list();
+ #if FEATURE_ARB_geometry_shader4
+       shProg->Geom.VerticesOut = 0;
+       shProg->Geom.InputType = GL_TRIANGLES;
+       shProg->Geom.OutputType = GL_TRIANGLE_STRIP;
+ #endif
+    }
+    return shProg;
+ }
+ 
+ 
+ /**
+  * Clear (free) the shader program state that gets produced by linking.
+  */
+ void
+ _mesa_clear_shader_program_data(GLcontext *ctx,
+                                 struct gl_shader_program *shProg)
+ {
+    _mesa_reference_vertprog(ctx, &shProg->VertexProgram, NULL);
+    _mesa_reference_fragprog(ctx, &shProg->FragmentProgram, NULL);
+    _mesa_reference_geomprog(ctx, &shProg->GeometryProgram, NULL);
+ 
+    if (shProg->Uniforms) {
+       _mesa_free_uniform_list(shProg->Uniforms);
+       shProg->Uniforms = NULL;
+    }
+ 
+    if (shProg->Varying) {
+       _mesa_free_parameter_list(shProg->Varying);
+       shProg->Varying = NULL;
+    }
+ }
+ 
+ 
+ /**
+  * Free all the data that hangs off a shader program object, but not the
+  * object itself.
+  */
+ void
+ _mesa_free_shader_program_data(GLcontext *ctx,
+                                struct gl_shader_program *shProg)
+ {
+    GLuint i;
+ 
+    assert(shProg->Type == GL_SHADER_PROGRAM_MESA);
+ 
+    _mesa_clear_shader_program_data(ctx, shProg);
+ 
+    if (shProg->Attributes) {
+       _mesa_free_parameter_list(shProg->Attributes);
+       shProg->Attributes = NULL;
+    }
+ 
+    /* detach shaders */
+    for (i = 0; i < shProg->NumShaders; i++) {
+       _mesa_reference_shader(ctx, &shProg->Shaders[i], NULL);
+    }
+    shProg->NumShaders = 0;
+ 
+    if (shProg->Shaders) {
+       free(shProg->Shaders);
+       shProg->Shaders = NULL;
+    }
+ 
+    if (shProg->InfoLog) {
 -      free(shProg->InfoLog);
++      talloc_free(shProg->InfoLog);
+       shProg->InfoLog = NULL;
+    }
+ 
+    /* Transform feedback varying vars */
+    for (i = 0; i < shProg->TransformFeedback.NumVarying; i++) {
+       free(shProg->TransformFeedback.VaryingNames[i]);
+    }
+    free(shProg->TransformFeedback.VaryingNames);
+    shProg->TransformFeedback.VaryingNames = NULL;
+    shProg->TransformFeedback.NumVarying = 0;
+ }
+ 
+ 
+ /**
+  * Free/delete a shader program object.
+  * Called via ctx->Driver.DeleteShaderProgram().
+  */
+ static void
+ __mesa_delete_shader_program(GLcontext *ctx, struct gl_shader_program *shProg)
+ {
+    _mesa_free_shader_program_data(ctx, shProg);
+ 
 -   free(shProg);
++   talloc_free(shProg);
+ }
+ 
+ 
+ /**
+  * Lookup a GLSL program object.
+  */
+ struct gl_shader_program *
+ _mesa_lookup_shader_program(GLcontext *ctx, GLuint name)
+ {
+    struct gl_shader_program *shProg;
+    if (name) {
+       shProg = (struct gl_shader_program *)
+          _mesa_HashLookup(ctx->Shared->ShaderObjects, name);
+       /* Note that both gl_shader and gl_shader_program objects are kept
+        * in the same hash table.  Check the object's type to be sure it's
+        * what we're expecting.
+        */
+       if (shProg && shProg->Type != GL_SHADER_PROGRAM_MESA) {
+          return NULL;
+       }
+       return shProg;
+    }
+    return NULL;
+ }
+ 
+ 
+ /**
+  * As above, but record an error if program is not found.
+  */
+ struct gl_shader_program *
+ _mesa_lookup_shader_program_err(GLcontext *ctx, GLuint name,
+                                 const char *caller)
+ {
+    if (!name) {
+       _mesa_error(ctx, GL_INVALID_VALUE, caller);
+       return NULL;
+    }
+    else {
+       struct gl_shader_program *shProg = (struct gl_shader_program *)
+          _mesa_HashLookup(ctx->Shared->ShaderObjects, name);
+       if (!shProg) {
+          _mesa_error(ctx, GL_INVALID_VALUE, caller);
+          return NULL;
+       }
+       if (shProg->Type != GL_SHADER_PROGRAM_MESA) {
+          _mesa_error(ctx, GL_INVALID_OPERATION, caller);
+          return NULL;
+       }
+       return shProg;
+    }
+ }
+ 
+ 
+ void
+ _mesa_init_shader_object_functions(struct dd_function_table *driver)
+ {
+    driver->NewShader = _mesa_new_shader;
+    driver->DeleteShader = __mesa_delete_shader;
+    driver->NewShaderProgram = _mesa_new_shader_program;
+    driver->DeleteShaderProgram = __mesa_delete_shader_program;
+ }
diff --cc src/mesa/main/shaderobj.h
index 00000000000,d6b37b45963..b48244dc0d2
mode 000000,100644..100644
--- a/src/mesa/main/shaderobj.h
+++ b/src/mesa/main/shaderobj.h
@@@ -1,0 -1,78 +1,98 @@@
+ /*
+  * Mesa 3-D graphics library
+  * Version:  6.5.3
+  *
+  * Copyright (C) 2004-2007  Brian Paul   All Rights Reserved.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included
+  * in all copies or substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  */
+ 
+ 
+ #ifndef SHADEROBJ_H
+ #define SHADEROBJ_H
+ 
+ 
 -#include "glheader.h"
 -#include "mtypes.h"
++#include "main/glheader.h"
++#include "main/mtypes.h"
++#include "program/ir_to_mesa.h"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++/**
++ * Internal functions
++ */
++
++extern void
++_mesa_init_shader_state(GLcontext * ctx);
++
++extern void
++_mesa_free_shader_state(GLcontext *ctx);
+ 
+ 
+ extern void
+ _mesa_reference_shader(GLcontext *ctx, struct gl_shader **ptr,
+                        struct gl_shader *sh);
+ 
+ extern struct gl_shader *
+ _mesa_lookup_shader(GLcontext *ctx, GLuint name);
+ 
+ extern struct gl_shader *
+ _mesa_lookup_shader_err(GLcontext *ctx, GLuint name, const char *caller);
+ 
+ 
+ 
+ extern void
+ _mesa_reference_shader_program(GLcontext *ctx,
+                                struct gl_shader_program **ptr,
+                                struct gl_shader_program *shProg);
+ 
++extern struct gl_shader *
++_mesa_new_shader(GLcontext *ctx, GLuint name, GLenum type);
++
+ extern struct gl_shader_program *
+ _mesa_lookup_shader_program(GLcontext *ctx, GLuint name);
+ 
+ extern struct gl_shader_program *
+ _mesa_lookup_shader_program_err(GLcontext *ctx, GLuint name,
+                                 const char *caller);
+ 
+ extern void
+ _mesa_clear_shader_program_data(GLcontext *ctx,
+                                 struct gl_shader_program *shProg);
+ 
+ extern void
+ _mesa_free_shader_program_data(GLcontext *ctx,
+                                struct gl_shader_program *shProg);
+ 
+ 
+ 
+ extern void
+ _mesa_init_shader_object_functions(struct dd_function_table *driver);
+ 
+ extern void
+ _mesa_init_shader_state(GLcontext *ctx);
+ 
+ extern void
+ _mesa_free_shader_state(GLcontext *ctx);
+ 
++#ifdef __cplusplus
++};
++#endif
+ 
+ #endif /* SHADEROBJ_H */
diff --cc src/mesa/program/hash_table.c
index 00000000000,fa6ba2bfdfc..f7ef366c1a0
mode 000000,100644..100644
--- a/src/mesa/program/hash_table.c
+++ b/src/mesa/program/hash_table.c
@@@ -1,0 -1,159 +1,190 @@@
+ /*
+  * Copyright Â© 2008 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ 
+ /**
+  * \file hash_table.c
+  * \brief Implementation of a generic, opaque hash table data type.
+  *
+  * \author Ian Romanick <ian.d.romanick@intel.com>
+  */
+ 
+ #include "main/imports.h"
+ #include "main/simple_list.h"
+ #include "hash_table.h"
+ 
+ struct node {
+    struct node *next;
+    struct node *prev;
+ };
+ 
+ struct hash_table {
+     hash_func_t    hash;
+     hash_compare_func_t  compare;
+ 
+     unsigned num_buckets;
+     struct node buckets[1];
+ };
+ 
+ 
+ struct hash_node {
+     struct node link;
+     const void *key;
+     void *data;
+ };
+ 
+ 
+ struct hash_table *
+ hash_table_ctor(unsigned num_buckets, hash_func_t hash,
+                 hash_compare_func_t compare)
+ {
+     struct hash_table *ht;
+     unsigned i;
+ 
+ 
+     if (num_buckets < 16) {
+         num_buckets = 16;
+     }
+ 
+     ht = malloc(sizeof(*ht) + ((num_buckets - 1) 
+ 				     * sizeof(ht->buckets[0])));
+     if (ht != NULL) {
+         ht->hash = hash;
+         ht->compare = compare;
+         ht->num_buckets = num_buckets;
+ 
+         for (i = 0; i < num_buckets; i++) {
+             make_empty_list(& ht->buckets[i]);
+         }
+     }
+ 
+     return ht;
+ }
+ 
+ 
+ void
+ hash_table_dtor(struct hash_table *ht)
+ {
+    hash_table_clear(ht);
+    free(ht);
+ }
+ 
+ 
+ void
+ hash_table_clear(struct hash_table *ht)
+ {
+    struct node *node;
+    struct node *temp;
+    unsigned i;
+ 
+ 
+    for (i = 0; i < ht->num_buckets; i++) {
+       foreach_s(node, temp, & ht->buckets[i]) {
+ 	 remove_from_list(node);
+ 	 free(node);
+       }
+ 
+       assert(is_empty_list(& ht->buckets[i]));
+    }
+ }
+ 
+ 
+ void *
+ hash_table_find(struct hash_table *ht, const void *key)
+ {
+     const unsigned hash_value = (*ht->hash)(key);
+     const unsigned bucket = hash_value % ht->num_buckets;
+     struct node *node;
+ 
+     foreach(node, & ht->buckets[bucket]) {
+        struct hash_node *hn = (struct hash_node *) node;
+ 
+        if ((*ht->compare)(hn->key, key) == 0) {
+ 	  return hn->data;
+        }
+     }
+ 
+     return NULL;
+ }
+ 
+ 
+ void
+ hash_table_insert(struct hash_table *ht, void *data, const void *key)
+ {
+     const unsigned hash_value = (*ht->hash)(key);
+     const unsigned bucket = hash_value % ht->num_buckets;
+     struct hash_node *node;
+ 
+     node = calloc(1, sizeof(*node));
+ 
+     node->data = data;
+     node->key = key;
+ 
+     insert_at_head(& ht->buckets[bucket], & node->link);
+ }
+ 
++void
++hash_table_remove(struct hash_table *ht, const void *key)
++{
++    const unsigned hash_value = (*ht->hash)(key);
++    const unsigned bucket = hash_value % ht->num_buckets;
++    struct node *node;
++
++    foreach(node, & ht->buckets[bucket]) {
++       struct hash_node *hn = (struct hash_node *) node;
++
++       if ((*ht->compare)(hn->key, key) == 0) {
++	  remove_from_list(node);
++	  free(node);
++	  return;
++       }
++    }
++}
+ 
+ unsigned
+ hash_table_string_hash(const void *key)
+ {
+     const char *str = (const char *) key;
+     unsigned hash = 5381;
+ 
+ 
+     while (*str != '\0') {
+         hash = (hash * 33) + *str;
+         str++;
+     }
+ 
+     return hash;
+ }
++
++
++unsigned
++hash_table_pointer_hash(const void *key)
++{
++   return (unsigned)((uintptr_t) key / sizeof(void *));
++}
++
++
++int
++hash_table_pointer_compare(const void *key1, const void *key2)
++{
++   return key1 == key2 ? 0 : 1;
++}
diff --cc src/mesa/program/hash_table.h
index 00000000000,7b302f5dbee..228ab948ff4
mode 000000,100644..100644
--- a/src/mesa/program/hash_table.h
+++ b/src/mesa/program/hash_table.h
@@@ -1,0 -1,117 +1,152 @@@
+ /*
+  * Copyright Â© 2008 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ 
+ /**
+  * \file hash_table.h
+  * \brief Implementation of a generic, opaque hash table data type.
+  *
+  * \author Ian Romanick <ian.d.romanick@intel.com>
+  */
+ 
+ #ifndef HASH_TABLE_H
+ #define HASH_TABLE_H
+ 
+ #include <string.h>
+ 
+ struct hash_table;
+ 
+ typedef unsigned (*hash_func_t)(const void *key);
+ typedef int (*hash_compare_func_t)(const void *key1, const void *key2);
+ 
++#ifdef __cplusplus
++extern "C" {
++#endif
++
+ /**
+  * Hash table constructor
+  *
+  * Creates a hash table with the specified number of buckets.  The supplied
+  * \c hash and \c compare routines are used when adding elements to the table
+  * and when searching for elements in the table.
+  *
+  * \param num_buckets  Number of buckets (bins) in the hash table.
+  * \param hash         Function used to compute hash value of input keys.
+  * \param compare      Function used to compare keys.
+  */
+ extern struct hash_table *hash_table_ctor(unsigned num_buckets,
+     hash_func_t hash, hash_compare_func_t compare);
+ 
+ 
+ /**
+  * Release all memory associated with a hash table
+  *
+  * \warning
+  * This function cannot release memory occupied either by keys or data.
+  */
+ extern void hash_table_dtor(struct hash_table *ht);
+ 
+ 
+ /**
+  * Flush all entries from a hash table
+  *
+  * \param ht  Table to be cleared of its entries.
+  */
+ extern void hash_table_clear(struct hash_table *ht);
+ 
+ 
+ /**
+  * Search a hash table for a specific element
+  *
+  * \param ht   Table to be searched
+  * \param key  Key of the desired element
+  *
+  * \return
+  * The \c data value supplied to \c hash_table_insert when the element with
+  * the matching key was added.  If no matching key exists in the table,
+  * \c NULL is returned.
+  */
+ extern void *hash_table_find(struct hash_table *ht, const void *key);
+ 
+ 
+ /**
+  * Add an element to a hash table
+  */
+ extern void hash_table_insert(struct hash_table *ht, void *data,
+     const void *key);
+ 
++/**
++ * Remove a specific element from a hash table.
++ */
++extern void hash_table_remove(struct hash_table *ht, const void *key);
+ 
+ /**
+  * Compute hash value of a string
+  *
+  * Computes the hash value of a string using the DJB2 algorithm developed by
+  * Professor Daniel J. Bernstein.  It was published on comp.lang.c once upon
+  * a time.  I was unable to find the original posting in the archives.
+  *
+  * \param key  Pointer to a NUL terminated string to be hashed.
+  *
+  * \sa hash_table_string_compare
+  */
+ extern unsigned hash_table_string_hash(const void *key);
+ 
+ 
+ /**
+  * Compare two strings used as keys
+  *
+  * This is just a macro wrapper around \c strcmp.
+  *
+  * \sa hash_table_string_hash
+  */
+ #define hash_table_string_compare ((hash_compare_func_t) strcmp)
+ 
++
++/**
++ * Compute hash value of a pointer
++ *
++ * \param key  Pointer to be used as a hash key
++ *
++ * \note
++ * The memory pointed to by \c key is \b never accessed.  The value of \c key
++ * itself is used as the hash key
++ *
++ * \sa hash_table_pointer_compare
++ */
++unsigned
++hash_table_pointer_hash(const void *key);
++
++
++/**
++ * Compare two pointers used as keys
++ *
++ * \sa hash_table_pointer_hash
++ */
++int
++hash_table_pointer_compare(const void *key1, const void *key2);
++
++#ifdef __cplusplus
++};
++#endif
+ #endif /* HASH_TABLE_H */
diff --cc src/mesa/program/ir_to_mesa.cpp
index 00000000000,00000000000..1903b8fcf8f
new file mode 100644
--- /dev/null
+++ b/src/mesa/program/ir_to_mesa.cpp
@@@ -1,0 -1,0 +1,2309 @@@
++/*
++ * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
++ * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
++ * Copyright Â© 2010 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++ * DEALINGS IN THE SOFTWARE.
++ */
++
++/**
++ * \file ir_to_mesa.cpp
++ *
++ * Translates the IR to ARB_fragment_program text if possible,
++ * printing the result
++ */
++
++#include <stdio.h>
++#include "ir.h"
++#include "ir_visitor.h"
++#include "ir_print_visitor.h"
++#include "ir_expression_flattening.h"
++#include "glsl_types.h"
++#include "glsl_parser_extras.h"
++#include "../glsl/program.h"
++#include "ir_optimization.h"
++#include "ast.h"
++
++extern "C" {
++#include "main/mtypes.h"
++#include "main/shaderobj.h"
++#include "main/uniforms.h"
++#include "program/prog_instruction.h"
++#include "program/prog_optimize.h"
++#include "program/prog_print.h"
++#include "program/program.h"
++#include "program/prog_uniform.h"
++#include "program/prog_parameter.h"
++}
++
++/**
++ * This struct is a corresponding struct to Mesa prog_src_register, with
++ * wider fields.
++ */
++typedef struct ir_to_mesa_src_reg {
++   int file; /**< PROGRAM_* from Mesa */
++   int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
++   GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
++   int negate; /**< NEGATE_XYZW mask from mesa */
++   /** Register index should be offset by the integer in this reg. */
++   ir_to_mesa_src_reg *reladdr;
++} ir_to_mesa_src_reg;
++
++typedef struct ir_to_mesa_dst_reg {
++   int file; /**< PROGRAM_* from Mesa */
++   int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
++   int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
++   GLuint cond_mask:4;
++   /** Register index should be offset by the integer in this reg. */
++   ir_to_mesa_src_reg *reladdr;
++} ir_to_mesa_dst_reg;
++
++extern ir_to_mesa_src_reg ir_to_mesa_undef;
++
++class ir_to_mesa_instruction : public exec_node {
++public:
++   enum prog_opcode op;
++   ir_to_mesa_dst_reg dst_reg;
++   ir_to_mesa_src_reg src_reg[3];
++   /** Pointer to the ir source this tree came from for debugging */
++   ir_instruction *ir;
++   GLboolean cond_update;
++   int sampler; /**< sampler index */
++   int tex_target; /**< One of TEXTURE_*_INDEX */
++   GLboolean tex_shadow;
++
++   class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */
++};
++
++class variable_storage : public exec_node {
++public:
++   variable_storage(ir_variable *var, int file, int index)
++      : file(file), index(index), var(var)
++   {
++      /* empty */
++   }
++
++   int file;
++   int index;
++   ir_variable *var; /* variable that maps to this, if any */
++};
++
++class function_entry : public exec_node {
++public:
++   ir_function_signature *sig;
++
++   /**
++    * identifier of this function signature used by the program.
++    *
++    * At the point that Mesa instructions for function calls are
++    * generated, we don't know the address of the first instruction of
++    * the function body.  So we make the BranchTarget that is called a
++    * small integer and rewrite them during set_branchtargets().
++    */
++   int sig_id;
++
++   /**
++    * Pointer to first instruction of the function body.
++    *
++    * Set during function body emits after main() is processed.
++    */
++   ir_to_mesa_instruction *bgn_inst;
++
++   /**
++    * Index of the first instruction of the function body in actual
++    * Mesa IR.
++    *
++    * Set after convertion from ir_to_mesa_instruction to prog_instruction.
++    */
++   int inst;
++
++   /** Storage for the return value. */
++   ir_to_mesa_src_reg return_reg;
++};
++
++class ir_to_mesa_visitor : public ir_visitor {
++public:
++   ir_to_mesa_visitor();
++
++   function_entry *current_function;
++
++   GLcontext *ctx;
++   struct gl_program *prog;
++
++   int next_temp;
++
++   variable_storage *find_variable_storage(ir_variable *var);
++
++   function_entry *get_function_signature(ir_function_signature *sig);
++
++   ir_to_mesa_src_reg get_temp(const glsl_type *type);
++   void reladdr_to_temp(ir_instruction *ir,
++			ir_to_mesa_src_reg *reg, int *num_reladdr);
++
++   struct ir_to_mesa_src_reg src_reg_for_float(float val);
++
++   /**
++    * \name Visit methods
++    *
++    * As typical for the visitor pattern, there must be one \c visit method for
++    * each concrete subclass of \c ir_instruction.  Virtual base classes within
++    * the hierarchy should not have \c visit methods.
++    */
++   /*@{*/
++   virtual void visit(ir_variable *);
++   virtual void visit(ir_loop *);
++   virtual void visit(ir_loop_jump *);
++   virtual void visit(ir_function_signature *);
++   virtual void visit(ir_function *);
++   virtual void visit(ir_expression *);
++   virtual void visit(ir_swizzle *);
++   virtual void visit(ir_dereference_variable  *);
++   virtual void visit(ir_dereference_array *);
++   virtual void visit(ir_dereference_record *);
++   virtual void visit(ir_assignment *);
++   virtual void visit(ir_constant *);
++   virtual void visit(ir_call *);
++   virtual void visit(ir_return *);
++   virtual void visit(ir_discard *);
++   virtual void visit(ir_texture *);
++   virtual void visit(ir_if *);
++   /*@}*/
++
++   struct ir_to_mesa_src_reg result;
++
++   /** List of variable_storage */
++   exec_list variables;
++
++   /** List of function_entry */
++   exec_list function_signatures;
++   int next_signature_id;
++
++   /** List of ir_to_mesa_instruction */
++   exec_list instructions;
++
++   ir_to_mesa_instruction *ir_to_mesa_emit_op0(ir_instruction *ir,
++					       enum prog_opcode op);
++
++   ir_to_mesa_instruction *ir_to_mesa_emit_op1(ir_instruction *ir,
++					       enum prog_opcode op,
++					       ir_to_mesa_dst_reg dst,
++					       ir_to_mesa_src_reg src0);
++
++   ir_to_mesa_instruction *ir_to_mesa_emit_op2(ir_instruction *ir,
++					       enum prog_opcode op,
++					       ir_to_mesa_dst_reg dst,
++					       ir_to_mesa_src_reg src0,
++					       ir_to_mesa_src_reg src1);
++
++   ir_to_mesa_instruction *ir_to_mesa_emit_op3(ir_instruction *ir,
++					       enum prog_opcode op,
++					       ir_to_mesa_dst_reg dst,
++					       ir_to_mesa_src_reg src0,
++					       ir_to_mesa_src_reg src1,
++					       ir_to_mesa_src_reg src2);
++
++   void ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
++				   enum prog_opcode op,
++				   ir_to_mesa_dst_reg dst,
++				   ir_to_mesa_src_reg src0);
++
++   void ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
++				   enum prog_opcode op,
++				   ir_to_mesa_dst_reg dst,
++				   ir_to_mesa_src_reg src0,
++				   ir_to_mesa_src_reg src1);
++
++   GLboolean try_emit_mad(ir_expression *ir,
++			  int mul_operand);
++
++   int *sampler_map;
++   int sampler_map_size;
++
++   void map_sampler(int location, int sampler);
++   int get_sampler_number(int location);
++
++   void *mem_ctx;
++};
++
++ir_to_mesa_src_reg ir_to_mesa_undef = {
++   PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, NEGATE_NONE, NULL,
++};
++
++ir_to_mesa_dst_reg ir_to_mesa_undef_dst = {
++   PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, COND_TR, NULL,
++};
++
++ir_to_mesa_dst_reg ir_to_mesa_address_reg = {
++   PROGRAM_ADDRESS, 0, WRITEMASK_X, COND_TR, NULL
++};
++
++static int swizzle_for_size(int size)
++{
++   int size_swizzles[4] = {
++      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
++      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
++      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
++      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
++   };
++
++   return size_swizzles[size - 1];
++}
++
++ir_to_mesa_instruction *
++ir_to_mesa_visitor::ir_to_mesa_emit_op3(ir_instruction *ir,
++					enum prog_opcode op,
++					ir_to_mesa_dst_reg dst,
++					ir_to_mesa_src_reg src0,
++					ir_to_mesa_src_reg src1,
++					ir_to_mesa_src_reg src2)
++{
++   ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
++   int num_reladdr = 0;
++
++   /* If we have to do relative addressing, we want to load the ARL
++    * reg directly for one of the regs, and preload the other reladdr
++    * sources into temps.
++    */
++   num_reladdr += dst.reladdr != NULL;
++   num_reladdr += src0.reladdr != NULL;
++   num_reladdr += src1.reladdr != NULL;
++   num_reladdr += src2.reladdr != NULL;
++
++   reladdr_to_temp(ir, &src2, &num_reladdr);
++   reladdr_to_temp(ir, &src1, &num_reladdr);
++   reladdr_to_temp(ir, &src0, &num_reladdr);
++
++   if (dst.reladdr) {
++      ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg,
++                          *dst.reladdr);
++
++      num_reladdr--;
++   }
++   assert(num_reladdr == 0);
++
++   inst->op = op;
++   inst->dst_reg = dst;
++   inst->src_reg[0] = src0;
++   inst->src_reg[1] = src1;
++   inst->src_reg[2] = src2;
++   inst->ir = ir;
++
++   inst->function = NULL;
++
++   this->instructions.push_tail(inst);
++
++   return inst;
++}
++
++
++ir_to_mesa_instruction *
++ir_to_mesa_visitor::ir_to_mesa_emit_op2(ir_instruction *ir,
++					enum prog_opcode op,
++					ir_to_mesa_dst_reg dst,
++					ir_to_mesa_src_reg src0,
++					ir_to_mesa_src_reg src1)
++{
++   return ir_to_mesa_emit_op3(ir, op, dst, src0, src1, ir_to_mesa_undef);
++}
++
++ir_to_mesa_instruction *
++ir_to_mesa_visitor::ir_to_mesa_emit_op1(ir_instruction *ir,
++					enum prog_opcode op,
++					ir_to_mesa_dst_reg dst,
++					ir_to_mesa_src_reg src0)
++{
++   return ir_to_mesa_emit_op3(ir, op, dst,
++			      src0, ir_to_mesa_undef, ir_to_mesa_undef);
++}
++
++ir_to_mesa_instruction *
++ir_to_mesa_visitor::ir_to_mesa_emit_op0(ir_instruction *ir,
++					enum prog_opcode op)
++{
++   return ir_to_mesa_emit_op3(ir, op, ir_to_mesa_undef_dst,
++			      ir_to_mesa_undef,
++			      ir_to_mesa_undef,
++			      ir_to_mesa_undef);
++}
++
++void
++ir_to_mesa_visitor::map_sampler(int location, int sampler)
++{
++   if (this->sampler_map_size <= location) {
++      this->sampler_map = talloc_realloc(this->mem_ctx, this->sampler_map,
++					 int, location + 1);
++      this->sampler_map_size = location + 1;
++   }
++
++   this->sampler_map[location] = sampler;
++}
++
++int
++ir_to_mesa_visitor::get_sampler_number(int location)
++{
++   assert(location < this->sampler_map_size);
++   return this->sampler_map[location];
++}
++
++inline ir_to_mesa_dst_reg
++ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg)
++{
++   ir_to_mesa_dst_reg dst_reg;
++
++   dst_reg.file = reg.file;
++   dst_reg.index = reg.index;
++   dst_reg.writemask = WRITEMASK_XYZW;
++   dst_reg.cond_mask = COND_TR;
++   dst_reg.reladdr = reg.reladdr;
++
++   return dst_reg;
++}
++
++inline ir_to_mesa_src_reg
++ir_to_mesa_src_reg_from_dst(ir_to_mesa_dst_reg reg)
++{
++   ir_to_mesa_src_reg src_reg;
++
++   src_reg.file = reg.file;
++   src_reg.index = reg.index;
++   src_reg.swizzle = SWIZZLE_XYZW;
++   src_reg.negate = 0;
++   src_reg.reladdr = reg.reladdr;
++
++   return src_reg;
++}
++
++/**
++ * Emits Mesa scalar opcodes to produce unique answers across channels.
++ *
++ * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
++ * channel determines the result across all channels.  So to do a vec4
++ * of this operation, we want to emit a scalar per source channel used
++ * to produce dest channels.
++ */
++void
++ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
++					       enum prog_opcode op,
++					       ir_to_mesa_dst_reg dst,
++					       ir_to_mesa_src_reg orig_src0,
++					       ir_to_mesa_src_reg orig_src1)
++{
++   int i, j;
++   int done_mask = ~dst.writemask;
++
++   /* Mesa RCP is a scalar operation splatting results to all channels,
++    * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
++    * dst channels.
++    */
++   for (i = 0; i < 4; i++) {
++      GLuint this_mask = (1 << i);
++      ir_to_mesa_instruction *inst;
++      ir_to_mesa_src_reg src0 = orig_src0;
++      ir_to_mesa_src_reg src1 = orig_src1;
++
++      if (done_mask & this_mask)
++	 continue;
++
++      GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
++      GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
++      for (j = i + 1; j < 4; j++) {
++	 if (!(done_mask & (1 << j)) &&
++	     GET_SWZ(src0.swizzle, j) == src0_swiz &&
++	     GET_SWZ(src1.swizzle, j) == src1_swiz) {
++	    this_mask |= (1 << j);
++	 }
++      }
++      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
++				   src0_swiz, src0_swiz);
++      src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
++				  src1_swiz, src1_swiz);
++
++      inst = ir_to_mesa_emit_op2(ir, op,
++				 dst,
++				 src0,
++				 src1);
++      inst->dst_reg.writemask = this_mask;
++      done_mask |= this_mask;
++   }
++}
++
++void
++ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
++					       enum prog_opcode op,
++					       ir_to_mesa_dst_reg dst,
++					       ir_to_mesa_src_reg src0)
++{
++   ir_to_mesa_src_reg undef = ir_to_mesa_undef;
++
++   undef.swizzle = SWIZZLE_XXXX;
++
++   ir_to_mesa_emit_scalar_op2(ir, op, dst, src0, undef);
++}
++
++struct ir_to_mesa_src_reg
++ir_to_mesa_visitor::src_reg_for_float(float val)
++{
++   ir_to_mesa_src_reg src_reg;
++
++   src_reg.file = PROGRAM_CONSTANT;
++   src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
++					      &val, 1, &src_reg.swizzle);
++   src_reg.reladdr = NULL;
++   src_reg.negate = 0;
++
++   return src_reg;
++}
++
++static int
++type_size(const struct glsl_type *type)
++{
++   unsigned int i;
++   int size;
++
++   switch (type->base_type) {
++   case GLSL_TYPE_UINT:
++   case GLSL_TYPE_INT:
++   case GLSL_TYPE_FLOAT:
++   case GLSL_TYPE_BOOL:
++      if (type->is_matrix()) {
++	 return type->matrix_columns;
++      } else {
++	 /* Regardless of size of vector, it gets a vec4. This is bad
++	  * packing for things like floats, but otherwise arrays become a
++	  * mess.  Hopefully a later pass over the code can pack scalars
++	  * down if appropriate.
++	  */
++	 return 1;
++      }
++   case GLSL_TYPE_ARRAY:
++      return type_size(type->fields.array) * type->length;
++   case GLSL_TYPE_STRUCT:
++      size = 0;
++      for (i = 0; i < type->length; i++) {
++	 size += type_size(type->fields.structure[i].type);
++      }
++      return size;
++   default:
++      assert(0);
++   }
++}
++
++/**
++ * In the initial pass of codegen, we assign temporary numbers to
++ * intermediate results.  (not SSA -- variable assignments will reuse
++ * storage).  Actual register allocation for the Mesa VM occurs in a
++ * pass over the Mesa IR later.
++ */
++ir_to_mesa_src_reg
++ir_to_mesa_visitor::get_temp(const glsl_type *type)
++{
++   ir_to_mesa_src_reg src_reg;
++   int swizzle[4];
++   int i;
++
++   assert(!type->is_array());
++
++   src_reg.file = PROGRAM_TEMPORARY;
++   src_reg.index = next_temp;
++   src_reg.reladdr = NULL;
++   next_temp += type_size(type);
++
++   for (i = 0; i < type->vector_elements; i++)
++      swizzle[i] = i;
++   for (; i < 4; i++)
++      swizzle[i] = type->vector_elements - 1;
++   src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
++				   swizzle[2], swizzle[3]);
++   src_reg.negate = 0;
++
++   return src_reg;
++}
++
++variable_storage *
++ir_to_mesa_visitor::find_variable_storage(ir_variable *var)
++{
++   
++   variable_storage *entry;
++
++   foreach_iter(exec_list_iterator, iter, this->variables) {
++      entry = (variable_storage *)iter.get();
++
++      if (entry->var == var)
++	 return entry;
++   }
++
++   return NULL;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_variable *ir)
++{
++   (void)ir;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_loop *ir)
++{
++   assert(!ir->from);
++   assert(!ir->to);
++   assert(!ir->increment);
++   assert(!ir->counter);
++
++   ir_to_mesa_emit_op0(NULL, OPCODE_BGNLOOP);
++   visit_exec_list(&ir->body_instructions, this);
++   ir_to_mesa_emit_op0(NULL, OPCODE_ENDLOOP);
++}
++
++void
++ir_to_mesa_visitor::visit(ir_loop_jump *ir)
++{
++   switch (ir->mode) {
++   case ir_loop_jump::jump_break:
++      ir_to_mesa_emit_op0(NULL, OPCODE_BRK);
++      break;
++   case ir_loop_jump::jump_continue:
++      ir_to_mesa_emit_op0(NULL, OPCODE_CONT);
++      break;
++   }
++}
++
++
++void
++ir_to_mesa_visitor::visit(ir_function_signature *ir)
++{
++   assert(0);
++   (void)ir;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_function *ir)
++{
++   /* Ignore function bodies other than main() -- we shouldn't see calls to
++    * them since they should all be inlined before we get to ir_to_mesa.
++    */
++   if (strcmp(ir->name, "main") == 0) {
++      const ir_function_signature *sig;
++      exec_list empty;
++
++      sig = ir->matching_signature(&empty);
++
++      assert(sig);
++
++      foreach_iter(exec_list_iterator, iter, sig->body) {
++	 ir_instruction *ir = (ir_instruction *)iter.get();
++
++	 ir->accept(this);
++      }
++   }
++}
++
++GLboolean
++ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
++{
++   int nonmul_operand = 1 - mul_operand;
++   ir_to_mesa_src_reg a, b, c;
++
++   ir_expression *expr = ir->operands[mul_operand]->as_expression();
++   if (!expr || expr->operation != ir_binop_mul)
++      return false;
++
++   expr->operands[0]->accept(this);
++   a = this->result;
++   expr->operands[1]->accept(this);
++   b = this->result;
++   ir->operands[nonmul_operand]->accept(this);
++   c = this->result;
++
++   this->result = get_temp(ir->type);
++   ir_to_mesa_emit_op3(ir, OPCODE_MAD,
++		       ir_to_mesa_dst_reg_from_src(this->result), a, b, c);
++
++   return true;
++}
++
++void
++ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
++				    ir_to_mesa_src_reg *reg, int *num_reladdr)
++{
++   if (!reg->reladdr)
++      return;
++
++   ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg, *reg->reladdr);
++
++   if (*num_reladdr != 1) {
++      ir_to_mesa_src_reg temp = get_temp(glsl_type::vec4_type);
++
++      ir_to_mesa_emit_op1(ir, OPCODE_MOV,
++			  ir_to_mesa_dst_reg_from_src(temp), *reg);
++      *reg = temp;
++   }
++
++   (*num_reladdr)--;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_expression *ir)
++{
++   unsigned int operand;
++   struct ir_to_mesa_src_reg op[2];
++   struct ir_to_mesa_src_reg result_src;
++   struct ir_to_mesa_dst_reg result_dst;
++   const glsl_type *vec4_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 4, 1);
++   const glsl_type *vec3_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 3, 1);
++   const glsl_type *vec2_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 2, 1);
++
++   /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
++    */
++   if (ir->operation == ir_binop_add) {
++      if (try_emit_mad(ir, 1))
++	 return;
++      if (try_emit_mad(ir, 0))
++	 return;
++   }
++
++   for (operand = 0; operand < ir->get_num_operands(); operand++) {
++      this->result.file = PROGRAM_UNDEFINED;
++      ir->operands[operand]->accept(this);
++      if (this->result.file == PROGRAM_UNDEFINED) {
++	 ir_print_visitor v;
++	 printf("Failed to get tree for expression operand:\n");
++	 ir->operands[operand]->accept(&v);
++	 exit(1);
++      }
++      op[operand] = this->result;
++
++      /* Matrix expression operands should have been broken down to vector
++       * operations already.
++       */
++      assert(!ir->operands[operand]->type->is_matrix());
++   }
++
++   this->result.file = PROGRAM_UNDEFINED;
++
++   /* Storage for our result.  Ideally for an assignment we'd be using
++    * the actual storage for the result here, instead.
++    */
++   result_src = get_temp(ir->type);
++   /* convenience for the emit functions below. */
++   result_dst = ir_to_mesa_dst_reg_from_src(result_src);
++   /* Limit writes to the channels that will be used by result_src later.
++    * This does limit this temp's use as a temporary for multi-instruction
++    * sequences.
++    */
++   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
++
++   switch (ir->operation) {
++   case ir_unop_logic_not:
++      ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst,
++			  op[0], src_reg_for_float(0.0));
++      break;
++   case ir_unop_neg:
++      op[0].negate = ~op[0].negate;
++      result_src = op[0];
++      break;
++   case ir_unop_abs:
++      ir_to_mesa_emit_op1(ir, OPCODE_ABS, result_dst, op[0]);
++      break;
++   case ir_unop_sign:
++      ir_to_mesa_emit_op1(ir, OPCODE_SSG, result_dst, op[0]);
++      break;
++   case ir_unop_rcp:
++      ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, op[0]);
++      break;
++
++   case ir_unop_exp:
++      ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst,
++				 src_reg_for_float(M_E), op[0]);
++      break;
++   case ir_unop_exp2:
++      ir_to_mesa_emit_scalar_op1(ir, OPCODE_EX2, result_dst, op[0]);
++      break;
++   case ir_unop_log:
++      ir_to_mesa_emit_scalar_op1(ir, OPCODE_LOG, result_dst, op[0]);
++      break;
++   case ir_unop_log2:
++      ir_to_mesa_emit_scalar_op1(ir, OPCODE_LG2, result_dst, op[0]);
++      break;
++   case ir_unop_sin:
++      ir_to_mesa_emit_scalar_op1(ir, OPCODE_SIN, result_dst, op[0]);
++      break;
++   case ir_unop_cos:
++      ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]);
++      break;
++
++   case ir_unop_dFdx:
++      ir_to_mesa_emit_op1(ir, OPCODE_DDX, result_dst, op[0]);
++      break;
++   case ir_unop_dFdy:
++      ir_to_mesa_emit_op1(ir, OPCODE_DDY, result_dst, op[0]);
++      break;
++
++   case ir_binop_add:
++      ir_to_mesa_emit_op2(ir, OPCODE_ADD, result_dst, op[0], op[1]);
++      break;
++   case ir_binop_sub:
++      ir_to_mesa_emit_op2(ir, OPCODE_SUB, result_dst, op[0], op[1]);
++      break;
++
++   case ir_binop_mul:
++      ir_to_mesa_emit_op2(ir, OPCODE_MUL, result_dst, op[0], op[1]);
++      break;
++   case ir_binop_div:
++      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
++   case ir_binop_mod:
++      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
++      break;
++
++   case ir_binop_less:
++      ir_to_mesa_emit_op2(ir, OPCODE_SLT, result_dst, op[0], op[1]);
++      break;
++   case ir_binop_greater:
++      ir_to_mesa_emit_op2(ir, OPCODE_SGT, result_dst, op[0], op[1]);
++      break;
++   case ir_binop_lequal:
++      ir_to_mesa_emit_op2(ir, OPCODE_SLE, result_dst, op[0], op[1]);
++      break;
++   case ir_binop_gequal:
++      ir_to_mesa_emit_op2(ir, OPCODE_SGE, result_dst, op[0], op[1]);
++      break;
++   case ir_binop_equal:
++      ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
++      break;
++   case ir_binop_logic_xor:
++   case ir_binop_nequal:
++      ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst, op[0], op[1]);
++      break;
++
++   case ir_binop_logic_or:
++      /* This could be a saturated add and skip the SNE. */
++      ir_to_mesa_emit_op2(ir, OPCODE_ADD,
++			  result_dst,
++			  op[0], op[1]);
++
++      ir_to_mesa_emit_op2(ir, OPCODE_SNE,
++			  result_dst,
++			  result_src, src_reg_for_float(0.0));
++      break;
++
++   case ir_binop_logic_and:
++      /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
++      ir_to_mesa_emit_op2(ir, OPCODE_MUL,
++			  result_dst,
++			  op[0], op[1]);
++      break;
++
++   case ir_binop_dot:
++      if (ir->operands[0]->type == vec4_type) {
++	 assert(ir->operands[1]->type == vec4_type);
++	 ir_to_mesa_emit_op2(ir, OPCODE_DP4,
++			     result_dst,
++			     op[0], op[1]);
++      } else if (ir->operands[0]->type == vec3_type) {
++	 assert(ir->operands[1]->type == vec3_type);
++	 ir_to_mesa_emit_op2(ir, OPCODE_DP3,
++			     result_dst,
++			     op[0], op[1]);
++      } else if (ir->operands[0]->type == vec2_type) {
++	 assert(ir->operands[1]->type == vec2_type);
++	 ir_to_mesa_emit_op2(ir, OPCODE_DP2,
++			     result_dst,
++			     op[0], op[1]);
++      }
++      break;
++
++   case ir_binop_cross:
++      ir_to_mesa_emit_op2(ir, OPCODE_XPD, result_dst, op[0], op[1]);
++      break;
++
++   case ir_unop_sqrt:
++      ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
++      ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, result_src);
++      /* For incoming channels < 0, set the result to 0. */
++      ir_to_mesa_emit_op3(ir, OPCODE_CMP, result_dst,
++			  op[0], src_reg_for_float(0.0), result_src);
++      break;
++   case ir_unop_rsq:
++      ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
++      break;
++   case ir_unop_i2f:
++   case ir_unop_b2f:
++   case ir_unop_b2i:
++      /* Mesa IR lacks types, ints are stored as truncated floats. */
++      result_src = op[0];
++      break;
++   case ir_unop_f2i:
++      ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
++      break;
++   case ir_unop_f2b:
++   case ir_unop_i2b:
++      ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst,
++			  result_src, src_reg_for_float(0.0));
++      break;
++   case ir_unop_trunc:
++      ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
++      break;
++   case ir_unop_ceil:
++      op[0].negate = ~op[0].negate;
++      ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
++      result_src.negate = ~result_src.negate;
++      break;
++   case ir_unop_floor:
++      ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
++      break;
++   case ir_unop_fract:
++      ir_to_mesa_emit_op1(ir, OPCODE_FRC, result_dst, op[0]);
++      break;
++
++   case ir_binop_min:
++      ir_to_mesa_emit_op2(ir, OPCODE_MIN, result_dst, op[0], op[1]);
++      break;
++   case ir_binop_max:
++      ir_to_mesa_emit_op2(ir, OPCODE_MAX, result_dst, op[0], op[1]);
++      break;
++   case ir_binop_pow:
++      ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst, op[0], op[1]);
++      break;
++
++   case ir_unop_bit_not:
++   case ir_unop_u2f:
++   case ir_binop_lshift:
++   case ir_binop_rshift:
++   case ir_binop_bit_and:
++   case ir_binop_bit_xor:
++   case ir_binop_bit_or:
++      assert(!"GLSL 1.30 features unsupported");
++      break;
++   }
++
++   this->result = result_src;
++}
++
++
++void
++ir_to_mesa_visitor::visit(ir_swizzle *ir)
++{
++   ir_to_mesa_src_reg src_reg;
++   int i;
++   int swizzle[4];
++
++   /* Note that this is only swizzles in expressions, not those on the left
++    * hand side of an assignment, which do write masking.  See ir_assignment
++    * for that.
++    */
++
++   ir->val->accept(this);
++   src_reg = this->result;
++   assert(src_reg.file != PROGRAM_UNDEFINED);
++
++   for (i = 0; i < 4; i++) {
++      if (i < ir->type->vector_elements) {
++	 switch (i) {
++	 case 0:
++	    swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.x);
++	    break;
++	 case 1:
++	    swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.y);
++	    break;
++	 case 2:
++	    swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.z);
++	    break;
++	 case 3:
++	    swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.w);
++	    break;
++	 }
++      } else {
++	 /* If the type is smaller than a vec4, replicate the last
++	  * channel out.
++	  */
++	 swizzle[i] = swizzle[ir->type->vector_elements - 1];
++      }
++   }
++
++   src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0],
++				   swizzle[1],
++				   swizzle[2],
++				   swizzle[3]);
++
++   this->result = src_reg;
++}
++
++static int
++add_matrix_ref(struct gl_program *prog, int *tokens)
++{
++   int base_pos = -1;
++   int i;
++
++   /* Add a ref for each column.  It looks like the reason we do
++    * it this way is that _mesa_add_state_reference doesn't work
++    * for things that aren't vec4s, so the tokens[2]/tokens[3]
++    * range has to be equal.
++    */
++   for (i = 0; i < 4; i++) {
++      tokens[2] = i;
++      tokens[3] = i;
++      int pos = _mesa_add_state_reference(prog->Parameters,
++					  (gl_state_index *)tokens);
++      if (base_pos == -1)
++	 base_pos = pos;
++      else
++	 assert(base_pos + i == pos);
++   }
++
++   return base_pos;
++}
++
++static variable_storage *
++get_builtin_matrix_ref(void *mem_ctx, struct gl_program *prog, ir_variable *var,
++		       ir_rvalue *array_index)
++{
++   /*
++    * NOTE: The ARB_vertex_program extension specified that matrices get
++    * loaded in registers in row-major order.  With GLSL, we want column-
++    * major order.  So, we need to transpose all matrices here...
++    */
++   static const struct {
++      const char *name;
++      int matrix;
++      int modifier;
++   } matrices[] = {
++      { "gl_ModelViewMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE },
++      { "gl_ModelViewMatrixInverse", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVTRANS },
++      { "gl_ModelViewMatrixTranspose", STATE_MODELVIEW_MATRIX, 0 },
++      { "gl_ModelViewMatrixInverseTranspose", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
++
++      { "gl_ProjectionMatrix", STATE_PROJECTION_MATRIX, STATE_MATRIX_TRANSPOSE },
++      { "gl_ProjectionMatrixInverse", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVTRANS },
++      { "gl_ProjectionMatrixTranspose", STATE_PROJECTION_MATRIX, 0 },
++      { "gl_ProjectionMatrixInverseTranspose", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVERSE },
++
++      { "gl_ModelViewProjectionMatrix", STATE_MVP_MATRIX, STATE_MATRIX_TRANSPOSE },
++      { "gl_ModelViewProjectionMatrixInverse", STATE_MVP_MATRIX, STATE_MATRIX_INVTRANS },
++      { "gl_ModelViewProjectionMatrixTranspose", STATE_MVP_MATRIX, 0 },
++      { "gl_ModelViewProjectionMatrixInverseTranspose", STATE_MVP_MATRIX, STATE_MATRIX_INVERSE },
++
++      { "gl_TextureMatrix", STATE_TEXTURE_MATRIX, STATE_MATRIX_TRANSPOSE },
++      { "gl_TextureMatrixInverse", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVTRANS },
++      { "gl_TextureMatrixTranspose", STATE_TEXTURE_MATRIX, 0 },
++      { "gl_TextureMatrixInverseTranspose", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVERSE },
++
++      { "gl_NormalMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
++
++   };
++   unsigned int i;
++   variable_storage *entry;
++
++   /* C++ gets angry when we try to use an int as a gl_state_index, so we use
++    * ints for gl_state_index.  Make sure they're compatible.
++    */
++   assert(sizeof(gl_state_index) == sizeof(int));
++
++   for (i = 0; i < Elements(matrices); i++) {
++      if (strcmp(var->name, matrices[i].name) == 0) {
++	 int tokens[STATE_LENGTH];
++	 int base_pos = -1;
++
++	 tokens[0] = matrices[i].matrix;
++	 tokens[4] = matrices[i].modifier;
++	 if (matrices[i].matrix == STATE_TEXTURE_MATRIX) {
++	    ir_constant *index = array_index->constant_expression_value();
++	    if (index) {
++	       tokens[1] = index->value.i[0];
++	       base_pos = add_matrix_ref(prog, tokens);
++	    } else {
++	       for (i = 0; i < var->type->length; i++) {
++		  tokens[1] = i;
++		  int pos = add_matrix_ref(prog, tokens);
++		  if (base_pos == -1)
++		     base_pos = pos;
++		  else
++		     assert(base_pos + (int)i * 4 == pos);
++	       }
++	    }
++	 } else {
++	    tokens[1] = 0; /* unused array index */
++	    base_pos = add_matrix_ref(prog, tokens);
++	 }
++	 tokens[4] = matrices[i].modifier;
++
++	 entry = new(mem_ctx) variable_storage(var,
++					       PROGRAM_STATE_VAR,
++					       base_pos);
++
++	 return entry;
++      }
++   }
++
++   return NULL;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
++{
++   ir_to_mesa_src_reg src_reg;
++   variable_storage *entry = find_variable_storage(ir->var);
++   unsigned int loc;
++
++   if (!entry) {
++      switch (ir->var->mode) {
++      case ir_var_uniform:
++	 entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, ir->var,
++					NULL);
++	 if (entry)
++	    break;
++
++	 /* FINISHME: Fix up uniform name for arrays and things */
++	 if (ir->var->type->base_type == GLSL_TYPE_SAMPLER) {
++	    /* FINISHME: we whack the location of the var here, which
++	     * is probably not expected.  But we need to communicate
++	     * mesa's sampler number to the tex instruction.
++	     */
++	    int sampler = _mesa_add_sampler(this->prog->Parameters,
++					    ir->var->name,
++					    ir->var->type->gl_type);
++	    map_sampler(ir->var->location, sampler);
++
++	    entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_SAMPLER,
++						  sampler);
++	    this->variables.push_tail(entry);
++	    break;
++	 }
++
++	 assert(ir->var->type->gl_type != 0 &&
++		ir->var->type->gl_type != GL_INVALID_ENUM);
++	 loc = _mesa_add_uniform(this->prog->Parameters,
++				 ir->var->name,
++				 type_size(ir->var->type) * 4,
++				 ir->var->type->gl_type,
++				 NULL);
++
++	 /* Always mark the uniform used at this point.  If it isn't
++	  * used, dead code elimination should have nuked the decl already.
++	  */
++	 this->prog->Parameters->Parameters[loc].Used = GL_TRUE;
++
++	 entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_UNIFORM, loc);
++	 this->variables.push_tail(entry);
++	 break;
++      case ir_var_in:
++      case ir_var_out:
++      case ir_var_inout:
++	 /* The linker assigns locations for varyings and attributes,
++	  * including deprecated builtins (like gl_Color), user-assign
++	  * generic attributes (glBindVertexLocation), and
++	  * user-defined varyings.
++	  *
++	  * FINISHME: We would hit this path for function arguments.  Fix!
++	  */
++	 assert(ir->var->location != -1);
++	 if (ir->var->mode == ir_var_in ||
++	     ir->var->mode == ir_var_inout) {
++	    entry = new(mem_ctx) variable_storage(ir->var,
++						  PROGRAM_INPUT,
++						  ir->var->location);
++
++	    if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
++		ir->var->location >= VERT_ATTRIB_GENERIC0) {
++	       _mesa_add_attribute(prog->Attributes,
++				   ir->var->name,
++				   type_size(ir->var->type) * 4,
++				   ir->var->type->gl_type,
++				   ir->var->location - VERT_ATTRIB_GENERIC0);
++	    }
++	 } else {
++	    entry = new(mem_ctx) variable_storage(ir->var,
++						  PROGRAM_OUTPUT,
++						  ir->var->location);
++	 }
++
++	 break;
++      case ir_var_auto:
++      case ir_var_temporary:
++	 entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_TEMPORARY,
++					       this->next_temp);
++	 this->variables.push_tail(entry);
++
++	 next_temp += type_size(ir->var->type);
++	 break;
++      }
++
++      if (!entry) {
++	 printf("Failed to make storage for %s\n", ir->var->name);
++	 exit(1);
++      }
++   }
++
++   src_reg.file = entry->file;
++   src_reg.index = entry->index;
++   /* If the type is smaller than a vec4, replicate the last channel out. */
++   if (ir->type->is_scalar() || ir->type->is_vector())
++      src_reg.swizzle = swizzle_for_size(ir->var->type->vector_elements);
++   else
++      src_reg.swizzle = SWIZZLE_NOOP;
++   src_reg.reladdr = NULL;
++   src_reg.negate = 0;
++
++   this->result = src_reg;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_dereference_array *ir)
++{
++   ir_constant *index;
++   ir_to_mesa_src_reg src_reg;
++   ir_dereference_variable *deref_var = ir->array->as_dereference_variable();
++   int element_size = type_size(ir->type);
++
++   index = ir->array_index->constant_expression_value();
++
++   if (deref_var && strncmp(deref_var->var->name,
++			    "gl_TextureMatrix",
++			    strlen("gl_TextureMatrix")) == 0) {
++      ir_to_mesa_src_reg src_reg;
++      struct variable_storage *entry;
++
++      entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, deref_var->var,
++				     ir->array_index);
++      assert(entry);
++
++      src_reg.file = entry->file;
++      src_reg.index = entry->index;
++      src_reg.swizzle = swizzle_for_size(ir->type->vector_elements);
++      src_reg.negate = 0;
++
++      if (index) {
++	 src_reg.reladdr = NULL;
++      } else {
++	 ir_to_mesa_src_reg index_reg = get_temp(glsl_type::float_type);
++
++	 ir->array_index->accept(this);
++	 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
++			     ir_to_mesa_dst_reg_from_src(index_reg),
++			     this->result, src_reg_for_float(element_size));
++
++	 src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg);
++	 memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg));
++      }
++
++      this->result = src_reg;
++      return;
++   }
++
++   ir->array->accept(this);
++   src_reg = this->result;
++
++   if (index) {
++      src_reg.index += index->value.i[0] * element_size;
++   } else {
++      ir_to_mesa_src_reg array_base = this->result;
++      /* Variable index array dereference.  It eats the "vec4" of the
++       * base of the array and an index that offsets the Mesa register
++       * index.
++       */
++      ir->array_index->accept(this);
++
++      ir_to_mesa_src_reg index_reg;
++
++      if (element_size == 1) {
++	 index_reg = this->result;
++      } else {
++	 index_reg = get_temp(glsl_type::float_type);
++
++	 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
++			     ir_to_mesa_dst_reg_from_src(index_reg),
++			     this->result, src_reg_for_float(element_size));
++      }
++
++      src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg);
++      memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg));
++   }
++
++   /* If the type is smaller than a vec4, replicate the last channel out. */
++   if (ir->type->is_scalar() || ir->type->is_vector())
++      src_reg.swizzle = swizzle_for_size(ir->type->vector_elements);
++   else
++      src_reg.swizzle = SWIZZLE_NOOP;
++
++   this->result = src_reg;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_dereference_record *ir)
++{
++   unsigned int i;
++   const glsl_type *struct_type = ir->record->type;
++   int offset = 0;
++
++   ir->record->accept(this);
++
++   for (i = 0; i < struct_type->length; i++) {
++      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
++	 break;
++      offset += type_size(struct_type->fields.structure[i].type);
++   }
++   this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
++   this->result.index += offset;
++}
++
++/**
++ * We want to be careful in assignment setup to hit the actual storage
++ * instead of potentially using a temporary like we might with the
++ * ir_dereference handler.
++ *
++ * Thanks to ir_swizzle_swizzle, and ir_vec_index_to_swizzle, we
++ * should only see potentially one variable array index of a vector,
++ * and one swizzle, before getting to actual vec4 storage.  So handle
++ * those, then go use ir_dereference to handle the rest.
++ */
++static struct ir_to_mesa_dst_reg
++get_assignment_lhs(ir_instruction *ir, ir_to_mesa_visitor *v,
++		   ir_to_mesa_src_reg *r)
++{
++   struct ir_to_mesa_dst_reg dst_reg;
++   ir_swizzle *swiz;
++
++   ir_dereference_array *deref_array = ir->as_dereference_array();
++   /* This should have been handled by ir_vec_index_to_cond_assign */
++   if (deref_array) {
++      assert(!deref_array->array->type->is_vector());
++   }
++
++   /* Use the rvalue deref handler for the most part.  We'll ignore
++    * swizzles in it and write swizzles using writemask, though.
++    */
++   ir->accept(v);
++   dst_reg = ir_to_mesa_dst_reg_from_src(v->result);
++
++   if ((swiz = ir->as_swizzle())) {
++      int swizzles[4] = {
++	 swiz->mask.x,
++	 swiz->mask.y,
++	 swiz->mask.z,
++	 swiz->mask.w
++      };
++      int new_r_swizzle[4];
++      int orig_r_swizzle = r->swizzle;
++      int i;
++
++      for (i = 0; i < 4; i++) {
++	 new_r_swizzle[i] = GET_SWZ(orig_r_swizzle, 0);
++      }
++
++      dst_reg.writemask = 0;
++      for (i = 0; i < 4; i++) {
++	 if (i < swiz->mask.num_components) {
++	    dst_reg.writemask |= 1 << swizzles[i];
++	    new_r_swizzle[swizzles[i]] = GET_SWZ(orig_r_swizzle, i);
++	 }
++      }
++
++      r->swizzle = MAKE_SWIZZLE4(new_r_swizzle[0],
++				 new_r_swizzle[1],
++				 new_r_swizzle[2],
++				 new_r_swizzle[3]);
++   }
++
++   return dst_reg;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_assignment *ir)
++{
++   struct ir_to_mesa_dst_reg l;
++   struct ir_to_mesa_src_reg r;
++   int i;
++
++   assert(!ir->lhs->type->is_array());
++
++   ir->rhs->accept(this);
++   r = this->result;
++
++   l = get_assignment_lhs(ir->lhs, this, &r);
++
++   assert(l.file != PROGRAM_UNDEFINED);
++   assert(r.file != PROGRAM_UNDEFINED);
++
++   if (ir->condition) {
++      ir_to_mesa_src_reg condition;
++
++      ir->condition->accept(this);
++      condition = this->result;
++
++      /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves,
++       * and the condition we produced is 0.0 or 1.0.  By flipping the
++       * sign, we can choose which value OPCODE_CMP produces without
++       * an extra computing the condition.
++       */
++      condition.negate = ~condition.negate;
++      for (i = 0; i < type_size(ir->lhs->type); i++) {
++	 ir_to_mesa_emit_op3(ir, OPCODE_CMP, l,
++			     condition, r, ir_to_mesa_src_reg_from_dst(l));
++	 l.index++;
++	 r.index++;
++      }
++   } else {
++      for (i = 0; i < type_size(ir->lhs->type); i++) {
++	 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
++	 l.index++;
++	 r.index++;
++      }
++   }
++}
++
++
++void
++ir_to_mesa_visitor::visit(ir_constant *ir)
++{
++   ir_to_mesa_src_reg src_reg;
++   GLfloat stack_vals[4];
++   GLfloat *values = stack_vals;
++   unsigned int i;
++
++   if (ir->type->is_array()) {
++      ir->print();
++      printf("\n");
++      assert(!"FINISHME: array constants");
++   }
++
++   if (ir->type->is_matrix()) {
++      /* Unfortunately, 4 floats is all we can get into
++       * _mesa_add_unnamed_constant.  So, make a temp to store the
++       * matrix and move each constant value into it.  If we get
++       * lucky, copy propagation will eliminate the extra moves.
++       */
++      ir_to_mesa_src_reg mat = get_temp(glsl_type::vec4_type);
++      ir_to_mesa_dst_reg mat_column = ir_to_mesa_dst_reg_from_src(mat);
++
++      for (i = 0; i < ir->type->matrix_columns; i++) {
++	 src_reg.file = PROGRAM_CONSTANT;
++
++	 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
++	 values = &ir->value.f[i * ir->type->vector_elements];
++
++	 src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
++						    values,
++						    ir->type->vector_elements,
++						    &src_reg.swizzle);
++	 src_reg.reladdr = NULL;
++	 src_reg.negate = 0;
++	 ir_to_mesa_emit_op1(ir, OPCODE_MOV, mat_column, src_reg);
++
++	 mat_column.index++;
++      }
++
++      this->result = mat;
++   }
++
++   src_reg.file = PROGRAM_CONSTANT;
++   switch (ir->type->base_type) {
++   case GLSL_TYPE_FLOAT:
++      values = &ir->value.f[0];
++      break;
++   case GLSL_TYPE_UINT:
++      for (i = 0; i < ir->type->vector_elements; i++) {
++	 values[i] = ir->value.u[i];
++      }
++      break;
++   case GLSL_TYPE_INT:
++      for (i = 0; i < ir->type->vector_elements; i++) {
++	 values[i] = ir->value.i[i];
++      }
++      break;
++   case GLSL_TYPE_BOOL:
++      for (i = 0; i < ir->type->vector_elements; i++) {
++	 values[i] = ir->value.b[i];
++      }
++      break;
++   default:
++      assert(!"Non-float/uint/int/bool constant");
++   }
++
++   src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
++					      values, ir->type->vector_elements,
++					      &src_reg.swizzle);
++   src_reg.reladdr = NULL;
++   src_reg.negate = 0;
++
++   this->result = src_reg;
++}
++
++function_entry *
++ir_to_mesa_visitor::get_function_signature(ir_function_signature *sig)
++{
++   function_entry *entry;
++
++   foreach_iter(exec_list_iterator, iter, this->function_signatures) {
++      entry = (function_entry *)iter.get();
++
++      if (entry->sig == sig)
++	 return entry;
++   }
++
++   entry = talloc(mem_ctx, function_entry);
++   entry->sig = sig;
++   entry->sig_id = this->next_signature_id++;
++   entry->bgn_inst = NULL;
++
++   /* Allocate storage for all the parameters. */
++   foreach_iter(exec_list_iterator, iter, sig->parameters) {
++      ir_variable *param = (ir_variable *)iter.get();
++      variable_storage *storage;
++
++      storage = find_variable_storage(param);
++      assert(!storage);
++
++      storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
++					      this->next_temp);
++      this->variables.push_tail(storage);
++
++      this->next_temp += type_size(param->type);
++      break;
++   }
++
++   if (sig->return_type) {
++      entry->return_reg = get_temp(sig->return_type);
++   } else {
++      entry->return_reg = ir_to_mesa_undef;
++   }
++
++   this->function_signatures.push_tail(entry);
++   return entry;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_call *ir)
++{
++   ir_to_mesa_instruction *call_inst;
++   ir_function_signature *sig = ir->get_callee();
++   function_entry *entry = get_function_signature(sig);
++   int i;
++
++   /* Process in parameters. */
++   exec_list_iterator sig_iter = sig->parameters.iterator();
++   foreach_iter(exec_list_iterator, iter, *ir) {
++      ir_rvalue *param_rval = (ir_rvalue *)iter.get();
++      ir_variable *param = (ir_variable *)sig_iter.get();
++
++      if (param->mode == ir_var_in ||
++	  param->mode == ir_var_inout) {
++	 variable_storage *storage = find_variable_storage(param);
++	 assert(storage);
++
++	 param_rval->accept(this);
++	 ir_to_mesa_src_reg r = this->result;
++
++	 ir_to_mesa_dst_reg l;
++	 l.file = storage->file;
++	 l.index = storage->index;
++	 l.reladdr = NULL;
++	 l.writemask = WRITEMASK_XYZW;
++	 l.cond_mask = COND_TR;
++
++	 for (i = 0; i < type_size(param->type); i++) {
++	    ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
++	    l.index++;
++	    r.index++;
++	 }
++      }
++
++      sig_iter.next();
++   }
++   assert(!sig_iter.has_next());
++
++   /* Emit call instruction */
++   call_inst = ir_to_mesa_emit_op1(ir, OPCODE_CAL,
++				   ir_to_mesa_undef_dst, ir_to_mesa_undef);
++   call_inst->function = entry;
++
++   /* Process out parameters. */
++   sig_iter = sig->parameters.iterator();
++   foreach_iter(exec_list_iterator, iter, *ir) {
++      ir_rvalue *param_rval = (ir_rvalue *)iter.get();
++      ir_variable *param = (ir_variable *)sig_iter.get();
++
++      if (param->mode == ir_var_out ||
++	  param->mode == ir_var_inout) {
++	 variable_storage *storage = find_variable_storage(param);
++	 assert(storage);
++
++	 ir_to_mesa_src_reg r;
++	 r.file = storage->file;
++	 r.index = storage->index;
++	 r.reladdr = NULL;
++	 r.swizzle = SWIZZLE_NOOP;
++	 r.negate = 0;
++
++	 param_rval->accept(this);
++	 ir_to_mesa_dst_reg l = ir_to_mesa_dst_reg_from_src(this->result);
++
++	 for (i = 0; i < type_size(param->type); i++) {
++	    ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
++	    l.index++;
++	    r.index++;
++	 }
++      }
++
++      sig_iter.next();
++   }
++   assert(!sig_iter.has_next());
++
++   /* Process return value. */
++   this->result = entry->return_reg;
++}
++
++
++void
++ir_to_mesa_visitor::visit(ir_texture *ir)
++{
++   ir_to_mesa_src_reg result_src, coord, lod_info = { 0 }, projector;
++   ir_to_mesa_dst_reg result_dst, coord_dst;
++   ir_to_mesa_instruction *inst = NULL;
++   prog_opcode opcode = OPCODE_NOP;
++
++   ir->coordinate->accept(this);
++
++   /* Put our coords in a temp.  We'll need to modify them for shadow,
++    * projection, or LOD, so the only case we'd use it as is is if
++    * we're doing plain old texturing.  Mesa IR optimization should
++    * handle cleaning up our mess in that case.
++    */
++   coord = get_temp(glsl_type::vec4_type);
++   coord_dst = ir_to_mesa_dst_reg_from_src(coord);
++   ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst,
++		       this->result);
++
++   if (ir->projector) {
++      ir->projector->accept(this);
++      projector = this->result;
++   }
++
++   /* Storage for our result.  Ideally for an assignment we'd be using
++    * the actual storage for the result here, instead.
++    */
++   result_src = get_temp(glsl_type::vec4_type);
++   result_dst = ir_to_mesa_dst_reg_from_src(result_src);
++
++   switch (ir->op) {
++   case ir_tex:
++      opcode = OPCODE_TEX;
++      break;
++   case ir_txb:
++      opcode = OPCODE_TXB;
++      ir->lod_info.bias->accept(this);
++      lod_info = this->result;
++      break;
++   case ir_txl:
++      opcode = OPCODE_TXL;
++      ir->lod_info.lod->accept(this);
++      lod_info = this->result;
++      break;
++   case ir_txd:
++   case ir_txf:
++      assert(!"GLSL 1.30 features unsupported");
++      break;
++   }
++
++   if (ir->projector) {
++      if (opcode == OPCODE_TEX) {
++	 /* Slot the projector in as the last component of the coord. */
++	 coord_dst.writemask = WRITEMASK_W;
++	 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, projector);
++	 coord_dst.writemask = WRITEMASK_XYZW;
++	 opcode = OPCODE_TXP;
++      } else {
++	 ir_to_mesa_src_reg coord_w = coord;
++	 coord_w.swizzle = SWIZZLE_WWWW;
++
++	 /* For the other TEX opcodes there's no projective version
++	  * since the last slot is taken up by lod info.  Do the
++	  * projective divide now.
++	  */
++	 coord_dst.writemask = WRITEMASK_W;
++	 ir_to_mesa_emit_op1(ir, OPCODE_RCP, coord_dst, projector);
++
++	 coord_dst.writemask = WRITEMASK_XYZ;
++	 ir_to_mesa_emit_op2(ir, OPCODE_MUL, coord_dst, coord, coord_w);
++
++	 coord_dst.writemask = WRITEMASK_XYZW;
++	 coord.swizzle = SWIZZLE_XYZW;
++      }
++   }
++
++   if (ir->shadow_comparitor) {
++      /* Slot the shadow value in as the second to last component of the
++       * coord.
++       */
++      ir->shadow_comparitor->accept(this);
++      coord_dst.writemask = WRITEMASK_Z;
++      ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, this->result);
++      coord_dst.writemask = WRITEMASK_XYZW;
++   }
++
++   if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
++      /* Mesa IR stores lod or lod bias in the last channel of the coords. */
++      coord_dst.writemask = WRITEMASK_W;
++      ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, lod_info);
++      coord_dst.writemask = WRITEMASK_XYZW;
++   }
++
++   inst = ir_to_mesa_emit_op1(ir, opcode, result_dst, coord);
++
++   if (ir->shadow_comparitor)
++      inst->tex_shadow = GL_TRUE;
++
++   ir_dereference_variable *sampler = ir->sampler->as_dereference_variable();
++   assert(sampler); /* FINISHME: sampler arrays */
++   /* generate the mapping, remove when we generate storage at
++    * declaration time
++    */
++   sampler->accept(this);
++
++   inst->sampler = get_sampler_number(sampler->var->location);
++
++   switch (sampler->type->sampler_dimensionality) {
++   case GLSL_SAMPLER_DIM_1D:
++      inst->tex_target = TEXTURE_1D_INDEX;
++      break;
++   case GLSL_SAMPLER_DIM_2D:
++      inst->tex_target = TEXTURE_2D_INDEX;
++      break;
++   case GLSL_SAMPLER_DIM_3D:
++      inst->tex_target = TEXTURE_3D_INDEX;
++      break;
++   case GLSL_SAMPLER_DIM_CUBE:
++      inst->tex_target = TEXTURE_CUBE_INDEX;
++      break;
++   default:
++      assert(!"FINISHME: other texture targets");
++   }
++
++   this->result = result_src;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_return *ir)
++{
++   assert(current_function);
++
++   if (ir->get_value()) {
++      ir_to_mesa_dst_reg l;
++      int i;
++
++      ir->get_value()->accept(this);
++      ir_to_mesa_src_reg r = this->result;
++
++      l = ir_to_mesa_dst_reg_from_src(current_function->return_reg);
++
++      for (i = 0; i < type_size(current_function->sig->return_type); i++) {
++	 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
++	 l.index++;
++	 r.index++;
++      }
++   }
++
++   ir_to_mesa_emit_op0(ir, OPCODE_RET);
++}
++
++void
++ir_to_mesa_visitor::visit(ir_discard *ir)
++{
++   assert(ir->condition == NULL); /* FINISHME */
++
++   ir_to_mesa_emit_op0(ir, OPCODE_KIL_NV);
++}
++
++void
++ir_to_mesa_visitor::visit(ir_if *ir)
++{
++   ir_to_mesa_instruction *cond_inst, *if_inst, *else_inst = NULL;
++   ir_to_mesa_instruction *prev_inst;
++
++   prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
++
++   ir->condition->accept(this);
++   assert(this->result.file != PROGRAM_UNDEFINED);
++
++   if (ctx->Shader.EmitCondCodes) {
++      cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
++
++      /* See if we actually generated any instruction for generating
++       * the condition.  If not, then cook up a move to a temp so we
++       * have something to set cond_update on.
++       */
++      if (cond_inst == prev_inst) {
++	 ir_to_mesa_src_reg temp = get_temp(glsl_type::bool_type);
++	 cond_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_MOV,
++					 ir_to_mesa_dst_reg_from_src(temp),
++					 result);
++      }
++      cond_inst->cond_update = GL_TRUE;
++
++      if_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_IF);
++      if_inst->dst_reg.cond_mask = COND_NE;
++   } else {
++      if_inst = ir_to_mesa_emit_op1(ir->condition,
++				    OPCODE_IF, ir_to_mesa_undef_dst,
++				    this->result);
++   }
++
++   this->instructions.push_tail(if_inst);
++
++   visit_exec_list(&ir->then_instructions, this);
++
++   if (!ir->else_instructions.is_empty()) {
++      else_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_ELSE);
++      visit_exec_list(&ir->else_instructions, this);
++   }
++
++   if_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_ENDIF,
++				 ir_to_mesa_undef_dst, ir_to_mesa_undef);
++}
++
++ir_to_mesa_visitor::ir_to_mesa_visitor()
++{
++   result.file = PROGRAM_UNDEFINED;
++   next_temp = 1;
++   next_signature_id = 1;
++   sampler_map = NULL;
++   sampler_map_size = 0;
++   current_function = NULL;
++}
++
++static struct prog_src_register
++mesa_src_reg_from_ir_src_reg(ir_to_mesa_src_reg reg)
++{
++   struct prog_src_register mesa_reg;
++
++   mesa_reg.File = reg.file;
++   assert(reg.index < (1 << INST_INDEX_BITS) - 1);
++   mesa_reg.Index = reg.index;
++   mesa_reg.Swizzle = reg.swizzle;
++   mesa_reg.RelAddr = reg.reladdr != NULL;
++   mesa_reg.Negate = reg.negate;
++   mesa_reg.Abs = 0;
++
++   return mesa_reg;
++}
++
++static void
++set_branchtargets(ir_to_mesa_visitor *v,
++		  struct prog_instruction *mesa_instructions,
++		  int num_instructions)
++{
++   int if_count = 0, loop_count = 0;
++   int *if_stack, *loop_stack;
++   int if_stack_pos = 0, loop_stack_pos = 0;
++   int i, j;
++
++   for (i = 0; i < num_instructions; i++) {
++      switch (mesa_instructions[i].Opcode) {
++      case OPCODE_IF:
++	 if_count++;
++	 break;
++      case OPCODE_BGNLOOP:
++	 loop_count++;
++	 break;
++      case OPCODE_BRK:
++      case OPCODE_CONT:
++	 mesa_instructions[i].BranchTarget = -1;
++	 break;
++      default:
++	 break;
++      }
++   }
++
++   if_stack = (int *)calloc(if_count, sizeof(*if_stack));
++   loop_stack = (int *)calloc(loop_count, sizeof(*loop_stack));
++
++   for (i = 0; i < num_instructions; i++) {
++      switch (mesa_instructions[i].Opcode) {
++      case OPCODE_IF:
++	 if_stack[if_stack_pos] = i;
++	 if_stack_pos++;
++	 break;
++      case OPCODE_ELSE:
++	 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
++	 if_stack[if_stack_pos - 1] = i;
++	 break;
++      case OPCODE_ENDIF:
++	 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
++	 if_stack_pos--;
++	 break;
++      case OPCODE_BGNLOOP:
++	 loop_stack[loop_stack_pos] = i;
++	 loop_stack_pos++;
++	 break;
++      case OPCODE_ENDLOOP:
++	 loop_stack_pos--;
++	 /* Rewrite any breaks/conts at this nesting level (haven't
++	  * already had a BranchTarget assigned) to point to the end
++	  * of the loop.
++	  */
++	 for (j = loop_stack[loop_stack_pos]; j < i; j++) {
++	    if (mesa_instructions[j].Opcode == OPCODE_BRK ||
++		mesa_instructions[j].Opcode == OPCODE_CONT) {
++	       if (mesa_instructions[j].BranchTarget == -1) {
++		  mesa_instructions[j].BranchTarget = i;
++	       }
++	    }
++	 }
++	 /* The loop ends point at each other. */
++	 mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
++	 mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
++	 break;
++      case OPCODE_CAL:
++	 foreach_iter(exec_list_iterator, iter, v->function_signatures) {
++	    function_entry *entry = (function_entry *)iter.get();
++
++	    if (entry->sig_id == mesa_instructions[i].BranchTarget) {
++	       mesa_instructions[i].BranchTarget = entry->inst;
++	       break;
++	    }
++	 }
++	 break;
++      default:
++	 break;
++      }
++   }
++
++   free(if_stack);
++}
++
++static void
++print_program(struct prog_instruction *mesa_instructions,
++	      ir_instruction **mesa_instruction_annotation,
++	      int num_instructions)
++{
++   ir_instruction *last_ir = NULL;
++   int i;
++   int indent = 0;
++
++   for (i = 0; i < num_instructions; i++) {
++      struct prog_instruction *mesa_inst = mesa_instructions + i;
++      ir_instruction *ir = mesa_instruction_annotation[i];
++
++      fprintf(stdout, "%3d: ", i);
++
++      if (last_ir != ir && ir) {
++	 int j;
++
++	 for (j = 0; j < indent; j++) {
++	    fprintf(stdout, " ");
++	 }
++	 ir->print();
++	 printf("\n");
++	 last_ir = ir;
++
++	 fprintf(stdout, "     "); /* line number spacing. */
++      }
++
++      indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
++					    PROG_PRINT_DEBUG, NULL);
++   }
++}
++
++static void
++mark_input(struct gl_program *prog,
++	   int index,
++	   GLboolean reladdr)
++{
++   prog->InputsRead |= BITFIELD64_BIT(index);
++   int i;
++
++   if (reladdr) {
++      if (index >= FRAG_ATTRIB_TEX0 && index <= FRAG_ATTRIB_TEX7) {
++	 for (i = 0; i < 8; i++) {
++	    prog->InputsRead |= BITFIELD64_BIT(FRAG_ATTRIB_TEX0 + i);
++	 }
++      } else {
++	 assert(!"FINISHME: Mark InputsRead for varying arrays");
++      }
++   }
++}
++
++static void
++mark_output(struct gl_program *prog,
++	   int index,
++	   GLboolean reladdr)
++{
++   prog->OutputsWritten |= BITFIELD64_BIT(index);
++   int i;
++
++   if (reladdr) {
++      if (index >= VERT_RESULT_TEX0 && index <= VERT_RESULT_TEX7) {
++	 for (i = 0; i < 8; i++) {
++	    prog->OutputsWritten |= BITFIELD64_BIT(FRAG_ATTRIB_TEX0 + i);
++	 }
++      } else {
++	 assert(!"FINISHME: Mark OutputsWritten for varying arrays");
++      }
++   }
++}
++
++static void
++count_resources(struct gl_program *prog)
++{
++   unsigned int i;
++
++   prog->InputsRead = 0;
++   prog->OutputsWritten = 0;
++   prog->SamplersUsed = 0;
++
++   for (i = 0; i < prog->NumInstructions; i++) {
++      struct prog_instruction *inst = &prog->Instructions[i];
++      unsigned int reg;
++
++      switch (inst->DstReg.File) {
++      case PROGRAM_OUTPUT:
++	 mark_output(prog, inst->DstReg.Index, inst->DstReg.RelAddr);
++	 break;
++      case PROGRAM_INPUT:
++	 mark_input(prog, inst->DstReg.Index, inst->DstReg.RelAddr);
++	 break;
++      default:
++	 break;
++      }
++
++      for (reg = 0; reg < _mesa_num_inst_src_regs(inst->Opcode); reg++) {
++	 switch (inst->SrcReg[reg].File) {
++	 case PROGRAM_OUTPUT:
++	    mark_output(prog, inst->SrcReg[reg].Index,
++			inst->SrcReg[reg].RelAddr);
++	    break;
++	 case PROGRAM_INPUT:
++	    mark_input(prog, inst->SrcReg[reg].Index, inst->SrcReg[reg].RelAddr);
++	    break;
++	 default:
++	    break;
++	 }
++      }
++
++      /* Instead of just using the uniform's value to map to a
++       * sampler, Mesa first allocates a separate number for the
++       * sampler (_mesa_add_sampler), then we reindex it down to a
++       * small integer (sampler_map[], SamplersUsed), then that gets
++       * mapped to the uniform's value, and we get an actual sampler.
++       */
++      if (_mesa_is_tex_instruction(inst->Opcode)) {
++	 prog->SamplerTargets[inst->TexSrcUnit] =
++	    (gl_texture_index)inst->TexSrcTarget;
++	 prog->SamplersUsed |= 1 << inst->TexSrcUnit;
++	 if (inst->TexShadow) {
++	    prog->ShadowSamplers |= 1 << inst->TexSrcUnit;
++	 }
++      }
++   }
++
++   _mesa_update_shader_textures_used(prog);
++}
++
++/* Each stage has some uniforms in its Parameters list.  The Uniforms
++ * list for the linked shader program has a pointer to these uniforms
++ * in each of the stage's Parameters list, so that their values can be
++ * updated when a uniform is set.
++ */
++static void
++link_uniforms_to_shared_uniform_list(struct gl_uniform_list *uniforms,
++				     struct gl_program *prog)
++{
++   unsigned int i;
++
++   for (i = 0; i < prog->Parameters->NumParameters; i++) {
++      const struct gl_program_parameter *p = prog->Parameters->Parameters + i;
++
++      if (p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) {
++	 struct gl_uniform *uniform =
++	    _mesa_append_uniform(uniforms, p->Name, prog->Target, i);
++	 if (uniform)
++	    uniform->Initialized = p->Initialized;
++      }
++   }
++}
++
++struct gl_program *
++get_mesa_program(GLcontext *ctx, struct gl_shader_program *shader_program,
++		 struct gl_shader *shader)
++{
++   void *mem_ctx = shader_program;
++   ir_to_mesa_visitor v;
++   struct prog_instruction *mesa_instructions, *mesa_inst;
++   ir_instruction **mesa_instruction_annotation;
++   int i;
++   struct gl_program *prog;
++   GLenum target;
++   const char *target_string;
++   GLboolean progress;
++
++   switch (shader->Type) {
++   case GL_VERTEX_SHADER:
++      target = GL_VERTEX_PROGRAM_ARB;
++      target_string = "vertex";
++      break;
++   case GL_FRAGMENT_SHADER:
++      target = GL_FRAGMENT_PROGRAM_ARB;
++      target_string = "fragment";
++      break;
++   default:
++      assert(!"should not be reached");
++      break;
++   }
++
++   validate_ir_tree(shader->ir);
++
++   prog = ctx->Driver.NewProgram(ctx, target, 1);
++   if (!prog)
++      return NULL;
++   prog->Parameters = _mesa_new_parameter_list();
++   prog->Varying = _mesa_new_parameter_list();
++   prog->Attributes = _mesa_new_parameter_list();
++   v.ctx = ctx;
++   v.prog = prog;
++
++   v.mem_ctx = talloc_new(NULL);
++
++   /* Emit Mesa IR for main(). */
++   visit_exec_list(shader->ir, &v);
++   v.ir_to_mesa_emit_op0(NULL, OPCODE_END);
++
++   /* Now emit bodies for any functions that were used. */
++   do {
++      progress = GL_FALSE;
++
++      foreach_iter(exec_list_iterator, iter, v.function_signatures) {
++	 function_entry *entry = (function_entry *)iter.get();
++
++	 if (!entry->bgn_inst) {
++	    v.current_function = entry;
++
++	    entry->bgn_inst = v.ir_to_mesa_emit_op0(NULL, OPCODE_BGNSUB);
++	    entry->bgn_inst->function = entry;
++
++	    visit_exec_list(&entry->sig->body, &v);
++
++	    entry->bgn_inst = v.ir_to_mesa_emit_op0(NULL, OPCODE_RET);
++	    entry->bgn_inst = v.ir_to_mesa_emit_op0(NULL, OPCODE_ENDSUB);
++	    progress = GL_TRUE;
++	 }
++      }
++   } while (progress);
++
++   prog->NumTemporaries = v.next_temp;
++
++   int num_instructions = 0;
++   foreach_iter(exec_list_iterator, iter, v.instructions) {
++      num_instructions++;
++   }
++
++   mesa_instructions =
++      (struct prog_instruction *)calloc(num_instructions,
++					sizeof(*mesa_instructions));
++   mesa_instruction_annotation = talloc_array(mem_ctx, ir_instruction *,
++					      num_instructions);
++
++   mesa_inst = mesa_instructions;
++   i = 0;
++   foreach_iter(exec_list_iterator, iter, v.instructions) {
++      ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
++
++      mesa_inst->Opcode = inst->op;
++      mesa_inst->CondUpdate = inst->cond_update;
++      mesa_inst->DstReg.File = inst->dst_reg.file;
++      mesa_inst->DstReg.Index = inst->dst_reg.index;
++      mesa_inst->DstReg.CondMask = inst->dst_reg.cond_mask;
++      mesa_inst->DstReg.WriteMask = inst->dst_reg.writemask;
++      mesa_inst->DstReg.RelAddr = inst->dst_reg.reladdr != NULL;
++      mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src_reg[0]);
++      mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src_reg[1]);
++      mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src_reg[2]);
++      mesa_inst->TexSrcUnit = inst->sampler;
++      mesa_inst->TexSrcTarget = inst->tex_target;
++      mesa_inst->TexShadow = inst->tex_shadow;
++      mesa_instruction_annotation[i] = inst->ir;
++
++      if (ctx->Shader.EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
++	 shader_program->InfoLog =
++	    talloc_asprintf_append(shader_program->InfoLog,
++				   "Couldn't flatten if statement\n");
++	 shader_program->LinkStatus = false;
++      }
++
++      if (mesa_inst->Opcode == OPCODE_BGNSUB)
++	 inst->function->inst = i;
++      else if (mesa_inst->Opcode == OPCODE_CAL)
++	 mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */
++      else if (mesa_inst->Opcode == OPCODE_ARL)
++	 prog->NumAddressRegs = 1;
++
++      mesa_inst++;
++      i++;
++   }
++
++   set_branchtargets(&v, mesa_instructions, num_instructions);
++   if (ctx->Shader.Flags & GLSL_DUMP) {
++      printf("Mesa %s program:\n", target_string);
++      print_program(mesa_instructions, mesa_instruction_annotation,
++		    num_instructions);
++   }
++
++   prog->Instructions = mesa_instructions;
++   prog->NumInstructions = num_instructions;
++
++   _mesa_reference_program(ctx, &shader->Program, prog);
++
++   if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
++      _mesa_optimize_program(ctx, prog);
++   }
++
++   return prog;
++}
++
++extern "C" {
++
++void
++_mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
++{
++   struct _mesa_glsl_parse_state *state =
++      new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader);
++
++   const char *source = shader->Source;
++   state->error = preprocess(state, &source, &state->info_log,
++			     &ctx->Extensions);
++
++   if (!state->error) {
++     _mesa_glsl_lexer_ctor(state, source);
++     _mesa_glsl_parse(state);
++     _mesa_glsl_lexer_dtor(state);
++   }
++
++   shader->ir = new(shader) exec_list;
++   if (!state->error && !state->translation_unit.is_empty())
++      _mesa_ast_to_hir(shader->ir, state);
++
++   if (!state->error && !shader->ir->is_empty()) {
++      validate_ir_tree(shader->ir);
++
++      /* Lowering */
++      do_mat_op_to_vec(shader->ir);
++      do_mod_to_fract(shader->ir);
++      do_div_to_mul_rcp(shader->ir);
++
++      /* Optimization passes */
++      bool progress;
++      do {
++	 progress = false;
++
++	 progress = do_function_inlining(shader->ir) || progress;
++	 progress = do_if_simplification(shader->ir) || progress;
++	 progress = do_copy_propagation(shader->ir) || progress;
++	 progress = do_dead_code_local(shader->ir) || progress;
++	 progress = do_dead_code_unlinked(state, shader->ir) || progress;
++	 progress = do_constant_variable_unlinked(shader->ir) || progress;
++	 progress = do_constant_folding(shader->ir) || progress;
++	 progress = do_if_return(shader->ir) || progress;
++	 if (ctx->Shader.EmitNoIfs)
++	    progress = do_if_to_cond_assign(shader->ir) || progress;
++
++	 progress = do_vec_index_to_swizzle(shader->ir) || progress;
++	 /* Do this one after the previous to let the easier pass handle
++	  * constant vector indexing.
++	  */
++	 progress = do_vec_index_to_cond_assign(shader->ir) || progress;
++
++	 progress = do_swizzle_swizzle(shader->ir) || progress;
++      } while (progress);
++
++      validate_ir_tree(shader->ir);
++   }
++
++   shader->symbols = state->symbols;
++
++   shader->CompileStatus = !state->error;
++   shader->InfoLog = state->info_log;
++   shader->Version = state->language_version;
++   memcpy(shader->builtins_to_link, state->builtins_to_link,
++	  sizeof(shader->builtins_to_link[0]) * state->num_builtins_to_link);
++   shader->num_builtins_to_link = state->num_builtins_to_link;
++
++   /* Retain any live IR, but trash the rest. */
++   reparent_ir(shader->ir, shader);
++
++   talloc_free(state);
++ }
++
++void
++_mesa_glsl_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
++{
++   unsigned int i;
++
++   _mesa_clear_shader_program_data(ctx, prog);
++
++   prog->LinkStatus = GL_TRUE;
++
++   for (i = 0; i < prog->NumShaders; i++) {
++      if (!prog->Shaders[i]->CompileStatus) {
++	 prog->InfoLog =
++	    talloc_asprintf_append(prog->InfoLog,
++				   "linking with uncompiled shader");
++	 prog->LinkStatus = GL_FALSE;
++      }
++   }
++
++   prog->Varying = _mesa_new_parameter_list();
++   _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
++   _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
++
++   if (prog->LinkStatus) {
++      link_shaders(prog);
++
++      /* We don't use the linker's uniforms list, and cook up our own at
++       * generate time.
++       */
++      free(prog->Uniforms);
++      prog->Uniforms = _mesa_new_uniform_list();
++   }
++
++   if (prog->LinkStatus) {
++      for (i = 0; i < prog->_NumLinkedShaders; i++) {
++	 struct gl_program *linked_prog;
++
++	 linked_prog = get_mesa_program(ctx, prog,
++					prog->_LinkedShaders[i]);
++	 count_resources(linked_prog);
++
++	 link_uniforms_to_shared_uniform_list(prog->Uniforms, linked_prog);
++
++	 switch (prog->_LinkedShaders[i]->Type) {
++	 case GL_VERTEX_SHADER:
++	    _mesa_reference_vertprog(ctx, &prog->VertexProgram,
++				     (struct gl_vertex_program *)linked_prog);
++	    ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
++					    linked_prog);
++	    break;
++	 case GL_FRAGMENT_SHADER:
++	    _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
++				     (struct gl_fragment_program *)linked_prog);
++	    ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
++					    linked_prog);
++	    break;
++	 }
++      }
++   }
++}
++
++} /* extern "C" */
diff --cc src/mesa/program/ir_to_mesa.h
index 00000000000,00000000000..e832f84e754
new file mode 100644
--- /dev/null
+++ b/src/mesa/program/ir_to_mesa.h
@@@ -1,0 -1,0 +1,36 @@@
++/*
++ * Copyright Â© 2010 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++ * DEALINGS IN THE SOFTWARE.
++ */
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++#include "main/config.h"
++#include "main/mtypes.h"
++
++void _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *sh);
++void _mesa_glsl_link_shader(GLcontext *ctx, struct gl_shader_program *prog);
++
++#ifdef __cplusplus
++}
++#endif
diff --cc src/mesa/program/prog_execute.c
index 00000000000,f85c6513f31..b6da3449b26
mode 000000,100644..100644
--- a/src/mesa/program/prog_execute.c
+++ b/src/mesa/program/prog_execute.c
@@@ -1,0 -1,1798 +1,1802 @@@
+ /*
+  * Mesa 3-D graphics library
+  * Version:  7.3
+  *
+  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included
+  * in all copies or substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  */
+ 
+ /**
+  * \file prog_execute.c
+  * Software interpreter for vertex/fragment programs.
+  * \author Brian Paul
+  */
+ 
+ /*
+  * NOTE: we do everything in single-precision floating point; we don't
+  * currently observe the single/half/fixed-precision qualifiers.
+  *
+  */
+ 
+ 
+ #include "main/glheader.h"
+ #include "main/colormac.h"
+ #include "main/context.h"
+ #include "prog_execute.h"
+ #include "prog_instruction.h"
+ #include "prog_parameter.h"
+ #include "prog_print.h"
+ #include "prog_noise.h"
+ 
+ 
+ /* debug predicate */
+ #define DEBUG_PROG 0
+ 
+ 
+ /**
+  * Set x to positive or negative infinity.
+  */
+ #if defined(USE_IEEE) || defined(_WIN32)
+ #define SET_POS_INFINITY(x)                  \
+    do {                                      \
+          fi_type fi;                         \
+          fi.i = 0x7F800000;                  \
+          x = fi.f;                           \
+    } while (0)
+ #define SET_NEG_INFINITY(x)                  \
+    do {                                      \
+          fi_type fi;                         \
+          fi.i = 0xFF800000;                  \
+          x = fi.f;                           \
+    } while (0)
+ #elif defined(VMS)
+ #define SET_POS_INFINITY(x)  x = __MAXFLOAT
+ #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
+ #else
+ #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
+ #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
+ #endif
+ 
+ #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
+ 
+ 
+ static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
+ 
+ 
+ 
+ /**
+  * Return a pointer to the 4-element float vector specified by the given
+  * source register.
+  */
+ static INLINE const GLfloat *
+ get_src_register_pointer(const struct prog_src_register *source,
+                          const struct gl_program_machine *machine)
+ {
+    const struct gl_program *prog = machine->CurProgram;
+    GLint reg = source->Index;
+ 
+    if (source->RelAddr) {
+       /* add address register value to src index/offset */
+       reg += machine->AddressReg[0][0];
+       if (reg < 0) {
+          return ZeroVec;
+       }
+    }
+ 
+    switch (source->File) {
+    case PROGRAM_TEMPORARY:
+       if (reg >= MAX_PROGRAM_TEMPS)
+          return ZeroVec;
+       return machine->Temporaries[reg];
+ 
+    case PROGRAM_INPUT:
+       if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
+          if (reg >= VERT_ATTRIB_MAX)
+             return ZeroVec;
+          return machine->VertAttribs[reg];
+       }
+       else {
+          if (reg >= FRAG_ATTRIB_MAX)
+             return ZeroVec;
+          return machine->Attribs[reg][machine->CurElement];
+       }
+ 
+    case PROGRAM_OUTPUT:
+       if (reg >= MAX_PROGRAM_OUTPUTS)
+          return ZeroVec;
+       return machine->Outputs[reg];
+ 
+    case PROGRAM_LOCAL_PARAM:
+       if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
+          return ZeroVec;
+       return machine->CurProgram->LocalParams[reg];
+ 
+    case PROGRAM_ENV_PARAM:
+       if (reg >= MAX_PROGRAM_ENV_PARAMS)
+          return ZeroVec;
+       return machine->EnvParams[reg];
+ 
+    case PROGRAM_STATE_VAR:
+       /* Fallthrough */
+    case PROGRAM_CONSTANT:
+       /* Fallthrough */
+    case PROGRAM_UNIFORM:
+       /* Fallthrough */
+    case PROGRAM_NAMED_PARAM:
+       if (reg >= (GLint) prog->Parameters->NumParameters)
+          return ZeroVec;
+       return prog->Parameters->ParameterValues[reg];
+ 
+    default:
+       _mesa_problem(NULL,
+          "Invalid src register file %d in get_src_register_pointer()",
+          source->File);
+       return NULL;
+    }
+ }
+ 
+ 
+ /**
+  * Return a pointer to the 4-element float vector specified by the given
+  * destination register.
+  */
+ static INLINE GLfloat *
+ get_dst_register_pointer(const struct prog_dst_register *dest,
+                          struct gl_program_machine *machine)
+ {
+    static GLfloat dummyReg[4];
+    GLint reg = dest->Index;
+ 
+    if (dest->RelAddr) {
+       /* add address register value to src index/offset */
+       reg += machine->AddressReg[0][0];
+       if (reg < 0) {
+          return dummyReg;
+       }
+    }
+ 
+    switch (dest->File) {
+    case PROGRAM_TEMPORARY:
+       if (reg >= MAX_PROGRAM_TEMPS)
+          return dummyReg;
+       return machine->Temporaries[reg];
+ 
+    case PROGRAM_OUTPUT:
+       if (reg >= MAX_PROGRAM_OUTPUTS)
+          return dummyReg;
+       return machine->Outputs[reg];
+ 
+    case PROGRAM_WRITE_ONLY:
+       return dummyReg;
+ 
+    default:
+       _mesa_problem(NULL,
+          "Invalid dest register file %d in get_dst_register_pointer()",
+          dest->File);
+       return NULL;
+    }
+ }
+ 
+ 
+ 
+ /**
+  * Fetch a 4-element float vector from the given source register.
+  * Apply swizzling and negating as needed.
+  */
+ static void
+ fetch_vector4(const struct prog_src_register *source,
+               const struct gl_program_machine *machine, GLfloat result[4])
+ {
+    const GLfloat *src = get_src_register_pointer(source, machine);
+    ASSERT(src);
+ 
+    if (source->Swizzle == SWIZZLE_NOOP) {
+       /* no swizzling */
+       COPY_4V(result, src);
+    }
+    else {
+       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
+       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
+       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
+       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
+       result[0] = src[GET_SWZ(source->Swizzle, 0)];
+       result[1] = src[GET_SWZ(source->Swizzle, 1)];
+       result[2] = src[GET_SWZ(source->Swizzle, 2)];
+       result[3] = src[GET_SWZ(source->Swizzle, 3)];
+    }
+ 
+    if (source->Abs) {
+       result[0] = FABSF(result[0]);
+       result[1] = FABSF(result[1]);
+       result[2] = FABSF(result[2]);
+       result[3] = FABSF(result[3]);
+    }
+    if (source->Negate) {
+       ASSERT(source->Negate == NEGATE_XYZW);
+       result[0] = -result[0];
+       result[1] = -result[1];
+       result[2] = -result[2];
+       result[3] = -result[3];
+    }
+ 
+ #ifdef NAN_CHECK
+    assert(!IS_INF_OR_NAN(result[0]));
+    assert(!IS_INF_OR_NAN(result[0]));
+    assert(!IS_INF_OR_NAN(result[0]));
+    assert(!IS_INF_OR_NAN(result[0]));
+ #endif
+ }
+ 
+ 
+ /**
+  * Fetch a 4-element uint vector from the given source register.
+  * Apply swizzling but not negation/abs.
+  */
+ static void
+ fetch_vector4ui(const struct prog_src_register *source,
+                 const struct gl_program_machine *machine, GLuint result[4])
+ {
+    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
+    ASSERT(src);
+ 
+    if (source->Swizzle == SWIZZLE_NOOP) {
+       /* no swizzling */
+       COPY_4V(result, src);
+    }
+    else {
+       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
+       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
+       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
+       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
+       result[0] = src[GET_SWZ(source->Swizzle, 0)];
+       result[1] = src[GET_SWZ(source->Swizzle, 1)];
+       result[2] = src[GET_SWZ(source->Swizzle, 2)];
+       result[3] = src[GET_SWZ(source->Swizzle, 3)];
+    }
+ 
+    /* Note: no Negate or Abs here */
+ }
+ 
+ 
+ 
+ /**
+  * Fetch the derivative with respect to X or Y for the given register.
+  * XXX this currently only works for fragment program input attribs.
+  */
+ static void
+ fetch_vector4_deriv(GLcontext * ctx,
+                     const struct prog_src_register *source,
+                     const struct gl_program_machine *machine,
+                     char xOrY, GLfloat result[4])
+ {
+    if (source->File == PROGRAM_INPUT &&
+        source->Index < (GLint) machine->NumDeriv) {
+       const GLint col = machine->CurElement;
+       const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
+       const GLfloat invQ = 1.0f / w;
+       GLfloat deriv[4];
+ 
+       if (xOrY == 'X') {
+          deriv[0] = machine->DerivX[source->Index][0] * invQ;
+          deriv[1] = machine->DerivX[source->Index][1] * invQ;
+          deriv[2] = machine->DerivX[source->Index][2] * invQ;
+          deriv[3] = machine->DerivX[source->Index][3] * invQ;
+       }
+       else {
+          deriv[0] = machine->DerivY[source->Index][0] * invQ;
+          deriv[1] = machine->DerivY[source->Index][1] * invQ;
+          deriv[2] = machine->DerivY[source->Index][2] * invQ;
+          deriv[3] = machine->DerivY[source->Index][3] * invQ;
+       }
+ 
+       result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
+       result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
+       result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
+       result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
+       
+       if (source->Abs) {
+          result[0] = FABSF(result[0]);
+          result[1] = FABSF(result[1]);
+          result[2] = FABSF(result[2]);
+          result[3] = FABSF(result[3]);
+       }
+       if (source->Negate) {
+          ASSERT(source->Negate == NEGATE_XYZW);
+          result[0] = -result[0];
+          result[1] = -result[1];
+          result[2] = -result[2];
+          result[3] = -result[3];
+       }
+    }
+    else {
+       ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
+    }
+ }
+ 
+ 
+ /**
+  * As above, but only return result[0] element.
+  */
+ static void
+ fetch_vector1(const struct prog_src_register *source,
+               const struct gl_program_machine *machine, GLfloat result[4])
+ {
+    const GLfloat *src = get_src_register_pointer(source, machine);
+    ASSERT(src);
+ 
+    result[0] = src[GET_SWZ(source->Swizzle, 0)];
+ 
+    if (source->Abs) {
+       result[0] = FABSF(result[0]);
+    }
+    if (source->Negate) {
+       result[0] = -result[0];
+    }
+ }
+ 
+ 
+ static GLuint
+ fetch_vector1ui(const struct prog_src_register *source,
+                 const struct gl_program_machine *machine)
+ {
+    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
+    return src[GET_SWZ(source->Swizzle, 0)];
+ }
+ 
+ 
+ /**
+  * Fetch texel from texture.  Use partial derivatives when possible.
+  */
+ static INLINE void
+ fetch_texel(GLcontext *ctx,
+             const struct gl_program_machine *machine,
+             const struct prog_instruction *inst,
+             const GLfloat texcoord[4], GLfloat lodBias,
+             GLfloat color[4])
+ {
+    const GLuint unit = machine->Samplers[inst->TexSrcUnit];
+ 
+    /* Note: we only have the right derivatives for fragment input attribs.
+     */
+    if (machine->NumDeriv > 0 &&
+        inst->SrcReg[0].File == PROGRAM_INPUT &&
+        inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
+       /* simple texture fetch for which we should have derivatives */
+       GLuint attr = inst->SrcReg[0].Index;
+       machine->FetchTexelDeriv(ctx, texcoord,
+                                machine->DerivX[attr],
+                                machine->DerivY[attr],
+                                lodBias, unit, color);
+    }
+    else {
+       machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
+    }
+ }
+ 
+ 
+ /**
+  * Test value against zero and return GT, LT, EQ or UN if NaN.
+  */
+ static INLINE GLuint
+ generate_cc(float value)
+ {
+    if (value != value)
+       return COND_UN;           /* NaN */
+    if (value > 0.0F)
+       return COND_GT;
+    if (value < 0.0F)
+       return COND_LT;
+    return COND_EQ;
+ }
+ 
+ 
+ /**
+  * Test if the ccMaskRule is satisfied by the given condition code.
+  * Used to mask destination writes according to the current condition code.
+  */
+ static INLINE GLboolean
+ test_cc(GLuint condCode, GLuint ccMaskRule)
+ {
+    switch (ccMaskRule) {
+    case COND_EQ: return (condCode == COND_EQ);
+    case COND_NE: return (condCode != COND_EQ);
+    case COND_LT: return (condCode == COND_LT);
+    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
+    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
+    case COND_GT: return (condCode == COND_GT);
+    case COND_TR: return GL_TRUE;
+    case COND_FL: return GL_FALSE;
+    default:      return GL_TRUE;
+    }
+ }
+ 
+ 
+ /**
+  * Evaluate the 4 condition codes against a predicate and return GL_TRUE
+  * or GL_FALSE to indicate result.
+  */
+ static INLINE GLboolean
+ eval_condition(const struct gl_program_machine *machine,
+                const struct prog_instruction *inst)
+ {
+    const GLuint swizzle = inst->DstReg.CondSwizzle;
+    const GLuint condMask = inst->DstReg.CondMask;
+    if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
+        test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
+        test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
+        test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
+       return GL_TRUE;
+    }
+    else {
+       return GL_FALSE;
+    }
+ }
+ 
+ 
+ 
+ /**
+  * Store 4 floats into a register.  Observe the instructions saturate and
+  * set-condition-code flags.
+  */
+ static void
+ store_vector4(const struct prog_instruction *inst,
+               struct gl_program_machine *machine, const GLfloat value[4])
+ {
+    const struct prog_dst_register *dstReg = &(inst->DstReg);
+    const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
+    GLuint writeMask = dstReg->WriteMask;
+    GLfloat clampedValue[4];
+    GLfloat *dst = get_dst_register_pointer(dstReg, machine);
+ 
+ #if 0
+    if (value[0] > 1.0e10 ||
+        IS_INF_OR_NAN(value[0]) ||
+        IS_INF_OR_NAN(value[1]) ||
+        IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
+       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
+ #endif
+ 
+    if (clamp) {
+       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
+       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
+       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
+       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
+       value = clampedValue;
+    }
+ 
+    if (dstReg->CondMask != COND_TR) {
+       /* condition codes may turn off some writes */
+       if (writeMask & WRITEMASK_X) {
+          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
+                       dstReg->CondMask))
+             writeMask &= ~WRITEMASK_X;
+       }
+       if (writeMask & WRITEMASK_Y) {
+          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
+                       dstReg->CondMask))
+             writeMask &= ~WRITEMASK_Y;
+       }
+       if (writeMask & WRITEMASK_Z) {
+          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
+                       dstReg->CondMask))
+             writeMask &= ~WRITEMASK_Z;
+       }
+       if (writeMask & WRITEMASK_W) {
+          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
+                       dstReg->CondMask))
+             writeMask &= ~WRITEMASK_W;
+       }
+    }
+ 
+ #ifdef NAN_CHECK
+    assert(!IS_INF_OR_NAN(value[0]));
+    assert(!IS_INF_OR_NAN(value[0]));
+    assert(!IS_INF_OR_NAN(value[0]));
+    assert(!IS_INF_OR_NAN(value[0]));
+ #endif
+ 
+    if (writeMask & WRITEMASK_X)
+       dst[0] = value[0];
+    if (writeMask & WRITEMASK_Y)
+       dst[1] = value[1];
+    if (writeMask & WRITEMASK_Z)
+       dst[2] = value[2];
+    if (writeMask & WRITEMASK_W)
+       dst[3] = value[3];
+ 
+    if (inst->CondUpdate) {
+       if (writeMask & WRITEMASK_X)
+          machine->CondCodes[0] = generate_cc(value[0]);
+       if (writeMask & WRITEMASK_Y)
+          machine->CondCodes[1] = generate_cc(value[1]);
+       if (writeMask & WRITEMASK_Z)
+          machine->CondCodes[2] = generate_cc(value[2]);
+       if (writeMask & WRITEMASK_W)
+          machine->CondCodes[3] = generate_cc(value[3]);
+ #if DEBUG_PROG
+       printf("CondCodes=(%s,%s,%s,%s) for:\n",
+              _mesa_condcode_string(machine->CondCodes[0]),
+              _mesa_condcode_string(machine->CondCodes[1]),
+              _mesa_condcode_string(machine->CondCodes[2]),
+              _mesa_condcode_string(machine->CondCodes[3]));
+ #endif
+    }
+ }
+ 
+ 
+ /**
+  * Store 4 uints into a register.  Observe the set-condition-code flags.
+  */
+ static void
+ store_vector4ui(const struct prog_instruction *inst,
+                 struct gl_program_machine *machine, const GLuint value[4])
+ {
+    const struct prog_dst_register *dstReg = &(inst->DstReg);
+    GLuint writeMask = dstReg->WriteMask;
+    GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
+ 
+    if (dstReg->CondMask != COND_TR) {
+       /* condition codes may turn off some writes */
+       if (writeMask & WRITEMASK_X) {
+          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
+                       dstReg->CondMask))
+             writeMask &= ~WRITEMASK_X;
+       }
+       if (writeMask & WRITEMASK_Y) {
+          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
+                       dstReg->CondMask))
+             writeMask &= ~WRITEMASK_Y;
+       }
+       if (writeMask & WRITEMASK_Z) {
+          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
+                       dstReg->CondMask))
+             writeMask &= ~WRITEMASK_Z;
+       }
+       if (writeMask & WRITEMASK_W) {
+          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
+                       dstReg->CondMask))
+             writeMask &= ~WRITEMASK_W;
+       }
+    }
+ 
+    if (writeMask & WRITEMASK_X)
+       dst[0] = value[0];
+    if (writeMask & WRITEMASK_Y)
+       dst[1] = value[1];
+    if (writeMask & WRITEMASK_Z)
+       dst[2] = value[2];
+    if (writeMask & WRITEMASK_W)
+       dst[3] = value[3];
+ 
+    if (inst->CondUpdate) {
+       if (writeMask & WRITEMASK_X)
+          machine->CondCodes[0] = generate_cc((float)value[0]);
+       if (writeMask & WRITEMASK_Y)
+          machine->CondCodes[1] = generate_cc((float)value[1]);
+       if (writeMask & WRITEMASK_Z)
+          machine->CondCodes[2] = generate_cc((float)value[2]);
+       if (writeMask & WRITEMASK_W)
+          machine->CondCodes[3] = generate_cc((float)value[3]);
+ #if DEBUG_PROG
+       printf("CondCodes=(%s,%s,%s,%s) for:\n",
+              _mesa_condcode_string(machine->CondCodes[0]),
+              _mesa_condcode_string(machine->CondCodes[1]),
+              _mesa_condcode_string(machine->CondCodes[2]),
+              _mesa_condcode_string(machine->CondCodes[3]));
+ #endif
+    }
+ }
+ 
+ 
+ 
+ /**
+  * Execute the given vertex/fragment program.
+  *
+  * \param ctx  rendering context
+  * \param program  the program to execute
+  * \param machine  machine state (must be initialized)
+  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
+  */
+ GLboolean
+ _mesa_execute_program(GLcontext * ctx,
+                       const struct gl_program *program,
+                       struct gl_program_machine *machine)
+ {
+    const GLuint numInst = program->NumInstructions;
+    const GLuint maxExec = 10000;
+    GLuint pc, numExec = 0;
+ 
+    machine->CurProgram = program;
+ 
+    if (DEBUG_PROG) {
+       printf("execute program %u --------------------\n", program->Id);
+    }
+ 
+    if (program->Target == GL_VERTEX_PROGRAM_ARB) {
+       machine->EnvParams = ctx->VertexProgram.Parameters;
+    }
+    else {
+       machine->EnvParams = ctx->FragmentProgram.Parameters;
+    }
+ 
+    for (pc = 0; pc < numInst; pc++) {
+       const struct prog_instruction *inst = program->Instructions + pc;
+ 
+       if (DEBUG_PROG) {
+          _mesa_print_instruction(inst);
+       }
+ 
+       switch (inst->Opcode) {
+       case OPCODE_ABS:
+          {
+             GLfloat a[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             result[0] = FABSF(a[0]);
+             result[1] = FABSF(a[1]);
+             result[2] = FABSF(a[2]);
+             result[3] = FABSF(a[3]);
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_ADD:
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             result[0] = a[0] + b[0];
+             result[1] = a[1] + b[1];
+             result[2] = a[2] + b[2];
+             result[3] = a[3] + b[3];
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
+                       result[0], result[1], result[2], result[3],
+                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
+             }
+          }
+          break;
+       case OPCODE_AND:     /* bitwise AND */
+          {
+             GLuint a[4], b[4], result[4];
+             fetch_vector4ui(&inst->SrcReg[0], machine, a);
+             fetch_vector4ui(&inst->SrcReg[1], machine, b);
+             result[0] = a[0] & b[0];
+             result[1] = a[1] & b[1];
+             result[2] = a[2] & b[2];
+             result[3] = a[3] & b[3];
+             store_vector4ui(inst, machine, result);
+          }
+          break;
+       case OPCODE_ARL:
+          {
+             GLfloat t[4];
+             fetch_vector4(&inst->SrcReg[0], machine, t);
+             machine->AddressReg[0][0] = IFLOOR(t[0]);
+             if (DEBUG_PROG) {
+                printf("ARL %d\n", machine->AddressReg[0][0]);
+             }
+          }
+          break;
+       case OPCODE_BGNLOOP:
+          /* no-op */
+          ASSERT(program->Instructions[inst->BranchTarget].Opcode
+                 == OPCODE_ENDLOOP);
+          break;
+       case OPCODE_ENDLOOP:
+          /* subtract 1 here since pc is incremented by for(pc) loop */
+          ASSERT(program->Instructions[inst->BranchTarget].Opcode
+                 == OPCODE_BGNLOOP);
+          pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
+          break;
+       case OPCODE_BGNSUB:      /* begin subroutine */
+          break;
+       case OPCODE_ENDSUB:      /* end subroutine */
+          break;
+       case OPCODE_BRA:         /* branch (conditional) */
+          if (eval_condition(machine, inst)) {
+             /* take branch */
+             /* Subtract 1 here since we'll do pc++ below */
+             pc = inst->BranchTarget - 1;
+          }
+          break;
+       case OPCODE_BRK:         /* break out of loop (conditional) */
+          ASSERT(program->Instructions[inst->BranchTarget].Opcode
+                 == OPCODE_ENDLOOP);
+          if (eval_condition(machine, inst)) {
+             /* break out of loop */
+             /* pc++ at end of for-loop will put us after the ENDLOOP inst */
+             pc = inst->BranchTarget;
+          }
+          break;
+       case OPCODE_CONT:        /* continue loop (conditional) */
+          ASSERT(program->Instructions[inst->BranchTarget].Opcode
+                 == OPCODE_ENDLOOP);
+          if (eval_condition(machine, inst)) {
+             /* continue at ENDLOOP */
+             /* Subtract 1 here since we'll do pc++ at end of for-loop */
+             pc = inst->BranchTarget - 1;
+          }
+          break;
+       case OPCODE_CAL:         /* Call subroutine (conditional) */
+          if (eval_condition(machine, inst)) {
+             /* call the subroutine */
+             if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
+                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
+             }
+             machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
+             /* Subtract 1 here since we'll do pc++ at end of for-loop */
+             pc = inst->BranchTarget - 1;
+          }
+          break;
+       case OPCODE_CMP:
+          {
+             GLfloat a[4], b[4], c[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             fetch_vector4(&inst->SrcReg[2], machine, c);
+             result[0] = a[0] < 0.0F ? b[0] : c[0];
+             result[1] = a[1] < 0.0F ? b[1] : c[1];
+             result[2] = a[2] < 0.0F ? b[2] : c[2];
+             result[3] = a[3] < 0.0F ? b[3] : c[3];
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_COS:
+          {
+             GLfloat a[4], result[4];
+             fetch_vector1(&inst->SrcReg[0], machine, a);
+             result[0] = result[1] = result[2] = result[3]
+                = (GLfloat) cos(a[0]);
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_DDX:         /* Partial derivative with respect to X */
+          {
+             GLfloat result[4];
+             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
+                                 'X', result);
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_DDY:         /* Partial derivative with respect to Y */
+          {
+             GLfloat result[4];
+             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
+                                 'Y', result);
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_DP2:
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("DP2 %g = (%g %g) . (%g %g)\n",
+                       result[0], a[0], a[1], b[0], b[1]);
+             }
+          }
+          break;
+       case OPCODE_DP2A:
+          {
+             GLfloat a[4], b[4], c, result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             fetch_vector1(&inst->SrcReg[1], machine, &c);
+             result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
+                       result[0], a[0], a[1], b[0], b[1], c);
+             }
+          }
+          break;
+       case OPCODE_DP3:
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
+                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
+             }
+          }
+          break;
+       case OPCODE_DP4:
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
+                       result[0], a[0], a[1], a[2], a[3],
+                       b[0], b[1], b[2], b[3]);
+             }
+          }
+          break;
+       case OPCODE_DPH:
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_DST:         /* Distance vector */
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             result[0] = 1.0F;
+             result[1] = a[1] * b[1];
+             result[2] = a[2];
+             result[3] = b[3];
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_EXP:
+          {
+             GLfloat t[4], q[4], floor_t0;
+             fetch_vector1(&inst->SrcReg[0], machine, t);
+             floor_t0 = FLOORF(t[0]);
+             if (floor_t0 > FLT_MAX_EXP) {
+                SET_POS_INFINITY(q[0]);
+                SET_POS_INFINITY(q[2]);
+             }
+             else if (floor_t0 < FLT_MIN_EXP) {
+                q[0] = 0.0F;
+                q[2] = 0.0F;
+             }
+             else {
+                q[0] = LDEXPF(1.0, (int) floor_t0);
+                /* Note: GL_NV_vertex_program expects 
+                 * result.z = result.x * APPX(result.y)
+                 * We do what the ARB extension says.
+                 */
+                q[2] = (GLfloat) pow(2.0, t[0]);
+             }
+             q[1] = t[0] - floor_t0;
+             q[3] = 1.0F;
+             store_vector4( inst, machine, q );
+          }
+          break;
+       case OPCODE_EX2:         /* Exponential base 2 */
+          {
+             GLfloat a[4], result[4], val;
+             fetch_vector1(&inst->SrcReg[0], machine, a);
+             val = (GLfloat) pow(2.0, a[0]);
+             /*
+             if (IS_INF_OR_NAN(val))
+                val = 1.0e10;
+             */
+             result[0] = result[1] = result[2] = result[3] = val;
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_FLR:
+          {
+             GLfloat a[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             result[0] = FLOORF(a[0]);
+             result[1] = FLOORF(a[1]);
+             result[2] = FLOORF(a[2]);
+             result[3] = FLOORF(a[3]);
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_FRC:
+          {
+             GLfloat a[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             result[0] = a[0] - FLOORF(a[0]);
+             result[1] = a[1] - FLOORF(a[1]);
+             result[2] = a[2] - FLOORF(a[2]);
+             result[3] = a[3] - FLOORF(a[3]);
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_IF:
+          {
+             GLboolean cond;
+             ASSERT(program->Instructions[inst->BranchTarget].Opcode
+                    == OPCODE_ELSE ||
+                    program->Instructions[inst->BranchTarget].Opcode
+                    == OPCODE_ENDIF);
+             /* eval condition */
+             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
+                GLfloat a[4];
+                fetch_vector1(&inst->SrcReg[0], machine, a);
+                cond = (a[0] != 0.0);
+             }
+             else {
+                cond = eval_condition(machine, inst);
+             }
+             if (DEBUG_PROG) {
+                printf("IF: %d\n", cond);
+             }
+             /* do if/else */
+             if (cond) {
+                /* do if-clause (just continue execution) */
+             }
+             else {
+                /* go to the instruction after ELSE or ENDIF */
+                assert(inst->BranchTarget >= 0);
+                pc = inst->BranchTarget;
+             }
+          }
+          break;
+       case OPCODE_ELSE:
+          /* goto ENDIF */
+          ASSERT(program->Instructions[inst->BranchTarget].Opcode
+                 == OPCODE_ENDIF);
+          assert(inst->BranchTarget >= 0);
+          pc = inst->BranchTarget;
+          break;
+       case OPCODE_ENDIF:
+          /* nothing */
+          break;
+       case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
+          if (eval_condition(machine, inst)) {
+             return GL_FALSE;
+          }
+          break;
+       case OPCODE_KIL:         /* ARB_f_p only */
+          {
+             GLfloat a[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             if (DEBUG_PROG) {
+                printf("KIL if (%g %g %g %g) <= 0.0\n",
+                       a[0], a[1], a[2], a[3]);
+             }
+ 
+             if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
+                return GL_FALSE;
+             }
+          }
+          break;
+       case OPCODE_LG2:         /* log base 2 */
+          {
+             GLfloat a[4], result[4], val;
+             fetch_vector1(&inst->SrcReg[0], machine, a);
+ 	    /* The fast LOG2 macro doesn't meet the precision requirements.
+ 	     */
+             if (a[0] == 0.0F) {
+                val = -FLT_MAX;
+             }
+             else {
+                val = (float)(log(a[0]) * 1.442695F);
+             }
+             result[0] = result[1] = result[2] = result[3] = val;
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_LIT:
+          {
+             const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
+             GLfloat a[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             a[0] = MAX2(a[0], 0.0F);
+             a[1] = MAX2(a[1], 0.0F);
+             /* XXX ARB version clamps a[3], NV version doesn't */
+             a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
+             result[0] = 1.0F;
+             result[1] = a[0];
+             /* XXX we could probably just use pow() here */
+             if (a[0] > 0.0F) {
+                if (a[1] == 0.0 && a[3] == 0.0)
+                   result[2] = 1.0F;
+                else
+                   result[2] = (GLfloat) pow(a[1], a[3]);
+             }
+             else {
+                result[2] = 0.0F;
+             }
+             result[3] = 1.0F;
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
+                       result[0], result[1], result[2], result[3],
+                       a[0], a[1], a[2], a[3]);
+             }
+          }
+          break;
+       case OPCODE_LOG:
+          {
+             GLfloat t[4], q[4], abs_t0;
+             fetch_vector1(&inst->SrcReg[0], machine, t);
+             abs_t0 = FABSF(t[0]);
+             if (abs_t0 != 0.0F) {
+                /* Since we really can't handle infinite values on VMS
+                 * like other OSes we'll use __MAXFLOAT to represent
+                 * infinity.  This may need some tweaking.
+                 */
+ #ifdef VMS
+                if (abs_t0 == __MAXFLOAT)
+ #else
+                if (IS_INF_OR_NAN(abs_t0))
+ #endif
+                {
+                   SET_POS_INFINITY(q[0]);
+                   q[1] = 1.0F;
+                   SET_POS_INFINITY(q[2]);
+                }
+                else {
+                   int exponent;
+                   GLfloat mantissa = FREXPF(t[0], &exponent);
+                   q[0] = (GLfloat) (exponent - 1);
+                   q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
+ 
+ 		  /* The fast LOG2 macro doesn't meet the precision
+ 		   * requirements.
+ 		   */
+                   q[2] = (float)(log(t[0]) * 1.442695F);
+                }
+             }
+             else {
+                SET_NEG_INFINITY(q[0]);
+                q[1] = 1.0F;
+                SET_NEG_INFINITY(q[2]);
+             }
+             q[3] = 1.0;
+             store_vector4(inst, machine, q);
+          }
+          break;
+       case OPCODE_LRP:
+          {
+             GLfloat a[4], b[4], c[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             fetch_vector4(&inst->SrcReg[2], machine, c);
+             result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
+             result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
+             result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
+             result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("LRP (%g %g %g %g) = (%g %g %g %g), "
+                       "(%g %g %g %g), (%g %g %g %g)\n",
+                       result[0], result[1], result[2], result[3],
+                       a[0], a[1], a[2], a[3],
+                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
+             }
+          }
+          break;
+       case OPCODE_MAD:
+          {
+             GLfloat a[4], b[4], c[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             fetch_vector4(&inst->SrcReg[2], machine, c);
+             result[0] = a[0] * b[0] + c[0];
+             result[1] = a[1] * b[1] + c[1];
+             result[2] = a[2] * b[2] + c[2];
+             result[3] = a[3] * b[3] + c[3];
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
+                       "(%g %g %g %g) + (%g %g %g %g)\n",
+                       result[0], result[1], result[2], result[3],
+                       a[0], a[1], a[2], a[3],
+                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
+             }
+          }
+          break;
+       case OPCODE_MAX:
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             result[0] = MAX2(a[0], b[0]);
+             result[1] = MAX2(a[1], b[1]);
+             result[2] = MAX2(a[2], b[2]);
+             result[3] = MAX2(a[3], b[3]);
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
+                       result[0], result[1], result[2], result[3],
+                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
+             }
+          }
+          break;
+       case OPCODE_MIN:
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             result[0] = MIN2(a[0], b[0]);
+             result[1] = MIN2(a[1], b[1]);
+             result[2] = MIN2(a[2], b[2]);
+             result[3] = MIN2(a[3], b[3]);
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_MOV:
+          {
+             GLfloat result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, result);
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("MOV (%g %g %g %g)\n",
+                       result[0], result[1], result[2], result[3]);
+             }
+          }
+          break;
+       case OPCODE_MUL:
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             result[0] = a[0] * b[0];
+             result[1] = a[1] * b[1];
+             result[2] = a[2] * b[2];
+             result[3] = a[3] * b[3];
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
+                       result[0], result[1], result[2], result[3],
+                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
+             }
+          }
+          break;
+       case OPCODE_NOISE1:
+          {
+             GLfloat a[4], result[4];
+             fetch_vector1(&inst->SrcReg[0], machine, a);
+             result[0] =
+                result[1] =
+                result[2] =
+                result[3] = _mesa_noise1(a[0]);
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_NOISE2:
+          {
+             GLfloat a[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             result[0] =
+                result[1] =
+                result[2] = result[3] = _mesa_noise2(a[0], a[1]);
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_NOISE3:
+          {
+             GLfloat a[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             result[0] =
+                result[1] =
+                result[2] =
+                result[3] = _mesa_noise3(a[0], a[1], a[2]);
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_NOISE4:
+          {
+             GLfloat a[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             result[0] =
+                result[1] =
+                result[2] =
+                result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_NOP:
+          break;
+       case OPCODE_NOT:         /* bitwise NOT */
+          {
+             GLuint a[4], result[4];
+             fetch_vector4ui(&inst->SrcReg[0], machine, a);
+             result[0] = ~a[0];
+             result[1] = ~a[1];
+             result[2] = ~a[2];
+             result[3] = ~a[3];
+             store_vector4ui(inst, machine, result);
+          }
+          break;
+       case OPCODE_NRM3:        /* 3-component normalization */
+          {
+             GLfloat a[4], result[4];
+             GLfloat tmp;
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
+             if (tmp != 0.0F)
+                tmp = INV_SQRTF(tmp);
+             result[0] = tmp * a[0];
+             result[1] = tmp * a[1];
+             result[2] = tmp * a[2];
+             result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_NRM4:        /* 4-component normalization */
+          {
+             GLfloat a[4], result[4];
+             GLfloat tmp;
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
+             if (tmp != 0.0F)
+                tmp = INV_SQRTF(tmp);
+             result[0] = tmp * a[0];
+             result[1] = tmp * a[1];
+             result[2] = tmp * a[2];
+             result[3] = tmp * a[3];
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_OR:          /* bitwise OR */
+          {
+             GLuint a[4], b[4], result[4];
+             fetch_vector4ui(&inst->SrcReg[0], machine, a);
+             fetch_vector4ui(&inst->SrcReg[1], machine, b);
+             result[0] = a[0] | b[0];
+             result[1] = a[1] | b[1];
+             result[2] = a[2] | b[2];
+             result[3] = a[3] | b[3];
+             store_vector4ui(inst, machine, result);
+          }
+          break;
+       case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
+          {
+             GLfloat a[4];
+             GLuint result[4];
+             GLhalfNV hx, hy;
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             hx = _mesa_float_to_half(a[0]);
+             hy = _mesa_float_to_half(a[1]);
+             result[0] =
+             result[1] =
+             result[2] =
+             result[3] = hx | (hy << 16);
+             store_vector4ui(inst, machine, result);
+          }
+          break;
+       case OPCODE_PK2US:       /* pack two GLushorts into one 32-bit float */
+          {
+             GLfloat a[4];
+             GLuint result[4], usx, usy;
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             a[0] = CLAMP(a[0], 0.0F, 1.0F);
+             a[1] = CLAMP(a[1], 0.0F, 1.0F);
+             usx = IROUND(a[0] * 65535.0F);
+             usy = IROUND(a[1] * 65535.0F);
+             result[0] =
+             result[1] =
+             result[2] =
+             result[3] = usx | (usy << 16);
+             store_vector4ui(inst, machine, result);
+          }
+          break;
+       case OPCODE_PK4B:        /* pack four GLbytes into one 32-bit float */
+          {
+             GLfloat a[4];
+             GLuint result[4], ubx, uby, ubz, ubw;
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
+             a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
+             a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
+             a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
+             ubx = IROUND(127.0F * a[0] + 128.0F);
+             uby = IROUND(127.0F * a[1] + 128.0F);
+             ubz = IROUND(127.0F * a[2] + 128.0F);
+             ubw = IROUND(127.0F * a[3] + 128.0F);
+             result[0] =
+             result[1] =
+             result[2] =
+             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
+             store_vector4ui(inst, machine, result);
+          }
+          break;
+       case OPCODE_PK4UB:       /* pack four GLubytes into one 32-bit float */
+          {
+             GLfloat a[4];
+             GLuint result[4], ubx, uby, ubz, ubw;
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             a[0] = CLAMP(a[0], 0.0F, 1.0F);
+             a[1] = CLAMP(a[1], 0.0F, 1.0F);
+             a[2] = CLAMP(a[2], 0.0F, 1.0F);
+             a[3] = CLAMP(a[3], 0.0F, 1.0F);
+             ubx = IROUND(255.0F * a[0]);
+             uby = IROUND(255.0F * a[1]);
+             ubz = IROUND(255.0F * a[2]);
+             ubw = IROUND(255.0F * a[3]);
+             result[0] =
+             result[1] =
+             result[2] =
+             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
+             store_vector4ui(inst, machine, result);
+          }
+          break;
+       case OPCODE_POW:
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector1(&inst->SrcReg[0], machine, a);
+             fetch_vector1(&inst->SrcReg[1], machine, b);
+             result[0] = result[1] = result[2] = result[3]
+                = (GLfloat) pow(a[0], b[0]);
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_RCP:
+          {
+             GLfloat a[4], result[4];
+             fetch_vector1(&inst->SrcReg[0], machine, a);
+             if (DEBUG_PROG) {
+                if (a[0] == 0)
+                   printf("RCP(0)\n");
+                else if (IS_INF_OR_NAN(a[0]))
+                   printf("RCP(inf)\n");
+             }
+             result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_RET:         /* return from subroutine (conditional) */
+          if (eval_condition(machine, inst)) {
+             if (machine->StackDepth == 0) {
+                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
+             }
+             /* subtract one because of pc++ in the for loop */
+             pc = machine->CallStack[--machine->StackDepth] - 1;
+          }
+          break;
+       case OPCODE_RFL:         /* reflection vector */
+          {
+             GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
+             fetch_vector4(&inst->SrcReg[0], machine, axis);
+             fetch_vector4(&inst->SrcReg[1], machine, dir);
+             tmpW = DOT3(axis, axis);
+             tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
+             result[0] = tmpX * axis[0] - dir[0];
+             result[1] = tmpX * axis[1] - dir[1];
+             result[2] = tmpX * axis[2] - dir[2];
+             /* result[3] is never written! XXX enforce in parser! */
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_RSQ:         /* 1 / sqrt() */
+          {
+             GLfloat a[4], result[4];
+             fetch_vector1(&inst->SrcReg[0], machine, a);
+             a[0] = FABSF(a[0]);
+             result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
+             }
+          }
+          break;
+       case OPCODE_SCS:         /* sine and cos */
+          {
+             GLfloat a[4], result[4];
+             fetch_vector1(&inst->SrcReg[0], machine, a);
+             result[0] = (GLfloat) cos(a[0]);
+             result[1] = (GLfloat) sin(a[0]);
+             result[2] = 0.0;    /* undefined! */
+             result[3] = 0.0;    /* undefined! */
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_SEQ:         /* set on equal */
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
+             result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
+             result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
+             result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
+                       result[0], result[1], result[2], result[3],
+                       a[0], a[1], a[2], a[3],
+                       b[0], b[1], b[2], b[3]);
+             }
+          }
+          break;
+       case OPCODE_SFL:         /* set false, operands ignored */
+          {
+             static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_SGE:         /* set on greater or equal */
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
+             result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
+             result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
+             result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
+                       result[0], result[1], result[2], result[3],
+                       a[0], a[1], a[2], a[3],
+                       b[0], b[1], b[2], b[3]);
+             }
+          }
+          break;
+       case OPCODE_SGT:         /* set on greater */
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
+             result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
+             result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
+             result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
+                       result[0], result[1], result[2], result[3],
+                       a[0], a[1], a[2], a[3],
+                       b[0], b[1], b[2], b[3]);
+             }
+          }
+          break;
+       case OPCODE_SIN:
+          {
+             GLfloat a[4], result[4];
+             fetch_vector1(&inst->SrcReg[0], machine, a);
+             result[0] = result[1] = result[2] = result[3]
+                = (GLfloat) sin(a[0]);
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_SLE:         /* set on less or equal */
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
+             result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
+             result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
+             result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
+                       result[0], result[1], result[2], result[3],
+                       a[0], a[1], a[2], a[3],
+                       b[0], b[1], b[2], b[3]);
+             }
+          }
+          break;
+       case OPCODE_SLT:         /* set on less */
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
+             result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
+             result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
+             result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
+                       result[0], result[1], result[2], result[3],
+                       a[0], a[1], a[2], a[3],
+                       b[0], b[1], b[2], b[3]);
+             }
+          }
+          break;
+       case OPCODE_SNE:         /* set on not equal */
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
+             result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
+             result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
+             result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
+                       result[0], result[1], result[2], result[3],
+                       a[0], a[1], a[2], a[3],
+                       b[0], b[1], b[2], b[3]);
+             }
+          }
+          break;
+       case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
+          {
+             GLfloat a[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
+             result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
+             result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
+             result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_STR:         /* set true, operands ignored */
+          {
+             static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_SUB:
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             result[0] = a[0] - b[0];
+             result[1] = a[1] - b[1];
+             result[2] = a[2] - b[2];
+             result[3] = a[3] - b[3];
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
+                       result[0], result[1], result[2], result[3],
+                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
+             }
+          }
+          break;
+       case OPCODE_SWZ:         /* extended swizzle */
+          {
+             const struct prog_src_register *source = &inst->SrcReg[0];
+             const GLfloat *src = get_src_register_pointer(source, machine);
+             GLfloat result[4];
+             GLuint i;
+             for (i = 0; i < 4; i++) {
+                const GLuint swz = GET_SWZ(source->Swizzle, i);
+                if (swz == SWIZZLE_ZERO)
+                   result[i] = 0.0;
+                else if (swz == SWIZZLE_ONE)
+                   result[i] = 1.0;
+                else {
+                   ASSERT(swz >= 0);
+                   ASSERT(swz <= 3);
+                   result[i] = src[swz];
+                }
+                if (source->Negate & (1 << i))
+                   result[i] = -result[i];
+             }
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_TEX:         /* Both ARB and NV frag prog */
+          /* Simple texel lookup */
+          {
+             GLfloat texcoord[4], color[4];
+             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+ 
+             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
+ 
+             if (DEBUG_PROG) {
+                printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
+                       color[0], color[1], color[2], color[3],
+                       inst->TexSrcUnit,
+                       texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
+             }
+             store_vector4(inst, machine, color);
+          }
+          break;
+       case OPCODE_TXB:         /* GL_ARB_fragment_program only */
+          /* Texel lookup with LOD bias */
+          {
+             GLfloat texcoord[4], color[4], lodBias;
+ 
+             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+ 
+             /* texcoord[3] is the bias to add to lambda */
+             lodBias = texcoord[3];
+ 
+             fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
+ 
+             store_vector4(inst, machine, color);
+          }
+          break;
+       case OPCODE_TXD:         /* GL_NV_fragment_program only */
+          /* Texture lookup w/ partial derivatives for LOD */
+          {
+             GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
+             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+             fetch_vector4(&inst->SrcReg[1], machine, dtdx);
+             fetch_vector4(&inst->SrcReg[2], machine, dtdy);
+             machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
+                                      0.0, /* lodBias */
+                                      inst->TexSrcUnit, color);
+             store_vector4(inst, machine, color);
+          }
+          break;
+       case OPCODE_TXP:         /* GL_ARB_fragment_program only */
+          /* Texture lookup w/ projective divide */
+          {
+             GLfloat texcoord[4], color[4];
+ 
+             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+             /* Not so sure about this test - if texcoord[3] is
+              * zero, we'd probably be fine except for an ASSERT in
+              * IROUND_POS() which gets triggered by the inf values created.
+              */
+             if (texcoord[3] != 0.0) {
+                texcoord[0] /= texcoord[3];
+                texcoord[1] /= texcoord[3];
+                texcoord[2] /= texcoord[3];
+             }
+ 
+             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
+ 
+             store_vector4(inst, machine, color);
+          }
+          break;
+       case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
+          /* Texture lookup w/ projective divide, as above, but do not
+           * do the divide by w if sampling from a cube map.
+           */
+          {
+             GLfloat texcoord[4], color[4];
+ 
+             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+             if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
+                 texcoord[3] != 0.0) {
+                texcoord[0] /= texcoord[3];
+                texcoord[1] /= texcoord[3];
+                texcoord[2] /= texcoord[3];
+             }
+ 
+             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
+ 
+             store_vector4(inst, machine, color);
+          }
+          break;
+       case OPCODE_TRUNC:       /* truncate toward zero */
+          {
+             GLfloat a[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             result[0] = (GLfloat) (GLint) a[0];
+             result[1] = (GLfloat) (GLint) a[1];
+             result[2] = (GLfloat) (GLint) a[2];
+             result[3] = (GLfloat) (GLint) a[3];
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_UP2H:        /* unpack two 16-bit floats */
+          {
+             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
+             GLfloat result[4];
+             GLushort hx, hy;
+             hx = raw & 0xffff;
+             hy = raw >> 16;
+             result[0] = result[2] = _mesa_half_to_float(hx);
+             result[1] = result[3] = _mesa_half_to_float(hy);
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_UP2US:       /* unpack two GLushorts */
+          {
+             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
+             GLfloat result[4];
+             GLushort usx, usy;
+             usx = raw & 0xffff;
+             usy = raw >> 16;
+             result[0] = result[2] = usx * (1.0f / 65535.0f);
+             result[1] = result[3] = usy * (1.0f / 65535.0f);
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_UP4B:        /* unpack four GLbytes */
+          {
+             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
+             GLfloat result[4];
+             result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F;
+             result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F;
+             result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F;
+             result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F;
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_UP4UB:       /* unpack four GLubytes */
+          {
+             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
+             GLfloat result[4];
+             result[0] = ((raw >> 0) & 0xff) / 255.0F;
+             result[1] = ((raw >> 8) & 0xff) / 255.0F;
+             result[2] = ((raw >> 16) & 0xff) / 255.0F;
+             result[3] = ((raw >> 24) & 0xff) / 255.0F;
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_XOR:         /* bitwise XOR */
+          {
+             GLuint a[4], b[4], result[4];
+             fetch_vector4ui(&inst->SrcReg[0], machine, a);
+             fetch_vector4ui(&inst->SrcReg[1], machine, b);
+             result[0] = a[0] ^ b[0];
+             result[1] = a[1] ^ b[1];
+             result[2] = a[2] ^ b[2];
+             result[3] = a[3] ^ b[3];
+             store_vector4ui(inst, machine, result);
+          }
+          break;
+       case OPCODE_XPD:         /* cross product */
+          {
+             GLfloat a[4], b[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             result[0] = a[1] * b[2] - a[2] * b[1];
+             result[1] = a[2] * b[0] - a[0] * b[2];
+             result[2] = a[0] * b[1] - a[1] * b[0];
+             result[3] = 1.0;
+             store_vector4(inst, machine, result);
+             if (DEBUG_PROG) {
+                printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
+                       result[0], result[1], result[2], result[3],
+                       a[0], a[1], a[2], b[0], b[1], b[2]);
+             }
+          }
+          break;
+       case OPCODE_X2D:         /* 2-D matrix transform */
+          {
+             GLfloat a[4], b[4], c[4], result[4];
+             fetch_vector4(&inst->SrcReg[0], machine, a);
+             fetch_vector4(&inst->SrcReg[1], machine, b);
+             fetch_vector4(&inst->SrcReg[2], machine, c);
+             result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
+             result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
+             result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
+             result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
+             store_vector4(inst, machine, result);
+          }
+          break;
+       case OPCODE_PRINT:
+          {
+             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
+                GLfloat a[4];
+                fetch_vector4(&inst->SrcReg[0], machine, a);
+                printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
+                             a[0], a[1], a[2], a[3]);
+             }
+             else {
+                printf("%s\n", (const char *) inst->Data);
+             }
+          }
+          break;
+       case OPCODE_END:
+          return GL_TRUE;
+       default:
+          _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
+                        inst->Opcode);
+          return GL_TRUE;        /* return value doesn't matter */
+       }
+ 
+       numExec++;
+       if (numExec > maxExec) {
 -         _mesa_problem(ctx, "Infinite loop detected in fragment program");
++	 static GLboolean reported = GL_FALSE;
++	 if (!reported) {
++	    _mesa_problem(ctx, "Infinite loop detected in fragment program");
++	    reported = GL_TRUE;
++	 }
+          return GL_TRUE;
+       }
+ 
+    } /* for pc */
+ 
+    return GL_TRUE;
+ }
diff --cc src/mesa/program/prog_instruction.h
index 00000000000,dacbc33704b..02df2089458
mode 000000,100644..100644
--- a/src/mesa/program/prog_instruction.h
+++ b/src/mesa/program/prog_instruction.h
@@@ -1,0 -1,454 +1,454 @@@
+ /*
+  * Mesa 3-D graphics library
+  * Version:  7.3
+  *
+  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included
+  * in all copies or substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  */
+ 
+ 
+ /**
+  * \file prog_instruction.h
+  *
+  * Vertex/fragment program instruction datatypes and constants.
+  *
+  * \author Brian Paul
+  * \author Keith Whitwell
+  * \author Ian Romanick <idr@us.ibm.com>
+  */
+ 
+ 
+ #ifndef PROG_INSTRUCTION_H
+ #define PROG_INSTRUCTION_H
+ 
+ 
+ #include "main/mfeatures.h"
+ 
+ 
+ /**
+  * Swizzle indexes.
+  * Do not change!
+  */
+ /*@{*/
+ #define SWIZZLE_X    0
+ #define SWIZZLE_Y    1
+ #define SWIZZLE_Z    2
+ #define SWIZZLE_W    3
+ #define SWIZZLE_ZERO 4   /**< For SWZ instruction only */
+ #define SWIZZLE_ONE  5   /**< For SWZ instruction only */
+ #define SWIZZLE_NIL  7   /**< used during shader code gen (undefined value) */
+ /*@}*/
+ 
+ #define MAKE_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
+ #define SWIZZLE_NOOP           MAKE_SWIZZLE4(0,1,2,3)
+ #define GET_SWZ(swz, idx)      (((swz) >> ((idx)*3)) & 0x7)
+ #define GET_BIT(msk, idx)      (((msk) >> (idx)) & 0x1)
+ 
+ #define SWIZZLE_XYZW MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)
+ #define SWIZZLE_XXXX MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)
+ #define SWIZZLE_YYYY MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)
+ #define SWIZZLE_ZZZZ MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)
+ #define SWIZZLE_WWWW MAKE_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)
+ 
+ 
+ /**
+  * Writemask values, 1 bit per component.
+  */
+ /*@{*/
+ #define WRITEMASK_X     0x1
+ #define WRITEMASK_Y     0x2
+ #define WRITEMASK_XY    0x3
+ #define WRITEMASK_Z     0x4
+ #define WRITEMASK_XZ    0x5
+ #define WRITEMASK_YZ    0x6
+ #define WRITEMASK_XYZ   0x7
+ #define WRITEMASK_W     0x8
+ #define WRITEMASK_XW    0x9
+ #define WRITEMASK_YW    0xa
+ #define WRITEMASK_XYW   0xb
+ #define WRITEMASK_ZW    0xc
+ #define WRITEMASK_XZW   0xd
+ #define WRITEMASK_YZW   0xe
+ #define WRITEMASK_XYZW  0xf
+ /*@}*/
+ 
+ 
+ /**
+  * Condition codes
+  */
+ /*@{*/
+ #define COND_GT  1  /**< greater than zero */
+ #define COND_EQ  2  /**< equal to zero */
+ #define COND_LT  3  /**< less than zero */
+ #define COND_UN  4  /**< unordered (NaN) */
+ #define COND_GE  5  /**< greater than or equal to zero */
+ #define COND_LE  6  /**< less than or equal to zero */
+ #define COND_NE  7  /**< not equal to zero */
+ #define COND_TR  8  /**< always true */
+ #define COND_FL  9  /**< always false */
+ /*@}*/
+ 
+ 
+ /**
+  * Instruction precision for GL_NV_fragment_program
+  */
+ /*@{*/
+ #define FLOAT32  0x1
+ #define FLOAT16  0x2
+ #define FIXED12  0x4
+ /*@}*/
+ 
+ 
+ /**
+  * Saturation modes when storing values.
+  */
+ /*@{*/
+ #define SATURATE_OFF            0
+ #define SATURATE_ZERO_ONE       1
+ /*@}*/
+ 
+ 
+ /**
+  * Per-component negation masks
+  */
+ /*@{*/
+ #define NEGATE_X    0x1
+ #define NEGATE_Y    0x2
+ #define NEGATE_Z    0x4
+ #define NEGATE_W    0x8
+ #define NEGATE_XYZ  0x7
+ #define NEGATE_XYZW 0xf
+ #define NEGATE_NONE 0x0
+ /*@}*/
+ 
+ 
+ /**
+  * Program instruction opcodes for vertex, fragment and geometry programs.
+  */
+ typedef enum prog_opcode {
+                      /* ARB_vp   ARB_fp   NV_vp   NV_fp     GLSL */
+                      /*------------------------------------------*/
+    OPCODE_NOP = 0,   /*                                      X   */
+    OPCODE_ABS,       /*   X        X       1.1               X   */
+    OPCODE_ADD,       /*   X        X       X       X         X   */
+    OPCODE_AND,       /*                                          */
+    OPCODE_ARA,       /*                    2                     */
+    OPCODE_ARL,       /*   X                X                     */
+    OPCODE_ARL_NV,    /*                    2                     */
+    OPCODE_ARR,       /*                    2                     */
+    OPCODE_BGNLOOP,   /*                                     opt  */
+    OPCODE_BGNSUB,    /*                                     opt  */
+    OPCODE_BRA,       /*                    2                 X   */
+    OPCODE_BRK,       /*                    2                opt  */
+    OPCODE_CAL,       /*                    2       2             */
+    OPCODE_CMP,       /*            X                             */
+    OPCODE_CONT,      /*                                     opt  */
+    OPCODE_COS,       /*            X       2       X         X   */
+    OPCODE_DDX,       /*                            X         X   */
+    OPCODE_DDY,       /*                            X         X   */
+    OPCODE_DP2,       /*                            2             */
+    OPCODE_DP2A,      /*                            2             */
+    OPCODE_DP3,       /*   X        X       X       X         X   */
+    OPCODE_DP4,       /*   X        X       X       X         X   */
+    OPCODE_DPH,       /*   X        X       1.1                   */
+    OPCODE_DST,       /*   X        X       X       X             */
+    OPCODE_ELSE,      /*                                      X   */
+    OPCODE_EMIT_VERTEX,/*                                     X   */
+    OPCODE_END,       /*   X        X       X       X        opt  */
+    OPCODE_END_PRIMITIVE,/*                                   X   */
+    OPCODE_ENDIF,     /*                                     opt  */
+    OPCODE_ENDLOOP,   /*                                     opt  */
+    OPCODE_ENDSUB,    /*                                     opt  */
+    OPCODE_EX2,       /*   X        X       2       X         X   */
+    OPCODE_EXP,       /*   X                X                 X   */
+    OPCODE_FLR,       /*   X        X       2       X         X   */
+    OPCODE_FRC,       /*   X        X       2       X         X   */
+    OPCODE_IF,        /*                                     opt  */
+    OPCODE_KIL,       /*            X                             */
+    OPCODE_KIL_NV,    /*                            X         X   */
+    OPCODE_LG2,       /*   X        X       2       X         X   */
+    OPCODE_LIT,       /*   X        X       X       X             */
+    OPCODE_LOG,       /*   X                X                 X   */
+    OPCODE_LRP,       /*            X               X             */
+    OPCODE_MAD,       /*   X        X       X       X         X   */
+    OPCODE_MAX,       /*   X        X       X       X         X   */
+    OPCODE_MIN,       /*   X        X       X       X         X   */
+    OPCODE_MOV,       /*   X        X       X       X         X   */
+    OPCODE_MUL,       /*   X        X       X       X         X   */
+    OPCODE_NOISE1,    /*                                      X   */
+    OPCODE_NOISE2,    /*                                      X   */
+    OPCODE_NOISE3,    /*                                      X   */
+    OPCODE_NOISE4,    /*                                      X   */
+    OPCODE_NOT,       /*                                          */
+    OPCODE_NRM3,      /*                                          */
+    OPCODE_NRM4,      /*                                          */
+    OPCODE_OR,        /*                                          */
+    OPCODE_PK2H,      /*                            X             */
+    OPCODE_PK2US,     /*                            X             */
+    OPCODE_PK4B,      /*                            X             */
+    OPCODE_PK4UB,     /*                            X             */
+    OPCODE_POW,       /*   X        X               X         X   */
+    OPCODE_POPA,      /*                    3                     */
+    OPCODE_PRINT,     /*                    X       X             */
+    OPCODE_PUSHA,     /*                    3                     */
+    OPCODE_RCC,       /*                    1.1                   */
+    OPCODE_RCP,       /*   X        X       X       X         X   */
+    OPCODE_RET,       /*                    2       2             */
+    OPCODE_RFL,       /*            X               X             */
+    OPCODE_RSQ,       /*   X        X       X       X         X   */
+    OPCODE_SCS,       /*            X                             */
+    OPCODE_SEQ,       /*                    2       X         X   */
+    OPCODE_SFL,       /*                    2       X             */
+    OPCODE_SGE,       /*   X        X       X       X         X   */
+    OPCODE_SGT,       /*                    2       X         X   */
+    OPCODE_SIN,       /*            X       2       X         X   */
+    OPCODE_SLE,       /*                    2       X         X   */
+    OPCODE_SLT,       /*   X        X       X       X         X   */
+    OPCODE_SNE,       /*                    2       X         X   */
+    OPCODE_SSG,       /*                    2                     */
+    OPCODE_STR,       /*                    2       X             */
+    OPCODE_SUB,       /*   X        X       1.1     X         X   */
+    OPCODE_SWZ,       /*   X        X                             */
+    OPCODE_TEX,       /*            X       3       X         X   */
+    OPCODE_TXB,       /*            X       3                 X   */
+    OPCODE_TXD,       /*                            X         X   */
+    OPCODE_TXL,       /*                    3       2         X   */
+    OPCODE_TXP,       /*            X                         X   */
+    OPCODE_TXP_NV,    /*                    3       X             */
+    OPCODE_TRUNC,     /*                                      X   */
+    OPCODE_UP2H,      /*                            X             */
+    OPCODE_UP2US,     /*                            X             */
+    OPCODE_UP4B,      /*                            X             */
+    OPCODE_UP4UB,     /*                            X             */
+    OPCODE_X2D,       /*                            X             */
+    OPCODE_XOR,       /*                                          */
+    OPCODE_XPD,       /*   X        X                         X   */
+    MAX_OPCODE
+ } gl_inst_opcode;
+ 
+ 
+ /**
+  * Number of bits for the src/dst register Index field.
+  * This limits the size of temp/uniform register files.
+  */
+ #define INST_INDEX_BITS 10
+ 
+ 
+ /**
+  * Instruction source register.
+  */
+ struct prog_src_register
+ {
+    GLuint File:4;	/**< One of the PROGRAM_* register file values. */
+    GLint Index:(INST_INDEX_BITS+1); /**< Extra bit here for sign bit.
+                                      * May be negative for relative addressing.
+                                      */
+    GLuint Swizzle:12;
+    GLuint RelAddr:1;
+ 
+    /** Take the component-wise absolute value */
+    GLuint Abs:1;
+ 
+    /**
+     * Post-Abs negation.
+     * This will either be NEGATE_NONE or NEGATE_XYZW, except for the SWZ
+     * instruction which allows per-component negation.
+     */
+    GLuint Negate:4;
+ 
+    /**
+     * Is the register two-dimensional.
+     * Two dimensional registers are of the
+     * REGISTER[index][index2] format.
+     * They are used by the geometry shaders where
+     * the first index is the index within an array
+     * and the second index is the semantic of the
+     * array, e.g. gl_PositionIn[index] would become
+     * INPUT[index][gl_PositionIn]
+     */
+    GLuint HasIndex2:1;
+    GLuint RelAddr2:1;
+    GLint Index2:(INST_INDEX_BITS+1); /**< Extra bit here for sign bit.
+                                        * May be negative for relative
+                                        * addressing. */
+ };
+ 
+ 
+ /**
+  * Instruction destination register.
+  */
+ struct prog_dst_register
+ {
+    GLuint File:4;      /**< One of the PROGRAM_* register file values */
+    GLuint Index:INST_INDEX_BITS;  /**< Unsigned, never negative */
+    GLuint WriteMask:4;
+    GLuint RelAddr:1;
+ 
+    /**
+     * \name Conditional destination update control.
+     *
+     * \since
+     * NV_fragment_program, NV_fragment_program_option, NV_vertex_program2,
+     * NV_vertex_program2_option.
+     */
+    /*@{*/
+    /**
+     * Takes one of the 9 possible condition values (EQ, FL, GT, GE, LE, LT,
+     * NE, TR, or UN).  Dest reg is only written to if the matching
+     * (swizzled) condition code value passes.  When a conditional update mask
+     * is not specified, this will be \c COND_TR.
+     */
+    GLuint CondMask:4;
+ 
+    /**
+     * Condition code swizzle value.
+     */
+    GLuint CondSwizzle:12;
+ 
+    /**
+     * Selects the condition code register to use for conditional destination
+     * update masking.  In NV_fragmnet_program or NV_vertex_program2 mode, only
+     * condition code register 0 is available.  In NV_vertex_program3 mode,
+     * condition code registers 0 and 1 are available.
+     */
+    GLuint CondSrc:1;
+    /*@}*/
+ };
+ 
+ 
+ /**
+  * Vertex/fragment program instruction.
+  */
+ struct prog_instruction
+ {
+    gl_inst_opcode Opcode;
+    struct prog_src_register SrcReg[3];
+    struct prog_dst_register DstReg;
+ 
+    /**
+     * Indicates that the instruction should update the condition code
+     * register.
+     *
+     * \since
+     * NV_fragment_program, NV_fragment_program_option, NV_vertex_program2,
+     * NV_vertex_program2_option.
+     */
+    GLuint CondUpdate:1;
+ 
+    /**
+     * If prog_instruction::CondUpdate is \c GL_TRUE, this value selects the
+     * condition code register that is to be updated.
+     *
+     * In GL_NV_fragment_program or GL_NV_vertex_program2 mode, only condition
+     * code register 0 is available.  In GL_NV_vertex_program3 mode, condition
+     * code registers 0 and 1 are available.
+     *
+     * \since
+     * NV_fragment_program, NV_fragment_program_option, NV_vertex_program2,
+     * NV_vertex_program2_option.
+     */
+    GLuint CondDst:1;
+ 
+    /**
+     * Saturate each value of the vectored result to the range [0,1] or the
+     * range [-1,1].  \c SSAT mode (i.e., saturation to the range [-1,1]) is
+     * only available in NV_fragment_program2 mode.
+     * Value is one of the SATURATE_* tokens.
+     *
+     * \since
+     * NV_fragment_program, NV_fragment_program_option, NV_vertex_program3.
+     */
+    GLuint SaturateMode:2;
+ 
+    /**
+     * Per-instruction selectable precision: FLOAT32, FLOAT16, FIXED12.
+     *
+     * \since
+     * NV_fragment_program, NV_fragment_program_option.
+     */
+    GLuint Precision:3;
+ 
+    /**
+     * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions.
+     */
+    /*@{*/
+    /** Source texture unit. */
+    GLuint TexSrcUnit:5;
+ 
+    /** Source texture target, one of TEXTURE_{1D,2D,3D,CUBE,RECT}_INDEX */
+    GLuint TexSrcTarget:3;
+ 
+    /** True if tex instruction should do shadow comparison */
+    GLuint TexShadow:1;
+    /*@}*/
+ 
+    /**
+     * For BRA and CAL instructions, the location to jump to.
+     * For BGNLOOP, points to ENDLOOP (and vice-versa).
 -    * For BRK, points to BGNLOOP (which points to ENDLOOP).
++    * For BRK, points to ENDLOOP
+     * For IF, points to ELSE or ENDIF.
+     * For ELSE, points to ENDIF.
+     */
+    GLint BranchTarget;
+ 
+    /** for debugging purposes */
+    const char *Comment;
+ 
+    /** Arbitrary data.  Used for OPCODE_PRINT and some drivers */
+    void *Data;
+ 
+    /** for driver use (try to remove someday) */
+    GLint Aux;
+ };
+ 
+ 
+ extern void
+ _mesa_init_instructions(struct prog_instruction *inst, GLuint count);
+ 
+ extern struct prog_instruction *
+ _mesa_alloc_instructions(GLuint numInst);
+ 
+ extern struct prog_instruction *
+ _mesa_realloc_instructions(struct prog_instruction *oldInst,
+                            GLuint numOldInst, GLuint numNewInst);
+ 
+ extern struct prog_instruction *
+ _mesa_copy_instructions(struct prog_instruction *dest,
+                         const struct prog_instruction *src, GLuint n);
+ 
+ extern void
+ _mesa_free_instructions(struct prog_instruction *inst, GLuint count);
+ 
+ extern GLuint
+ _mesa_num_inst_src_regs(gl_inst_opcode opcode);
+ 
+ extern GLuint
+ _mesa_num_inst_dst_regs(gl_inst_opcode opcode);
+ 
+ extern GLboolean
+ _mesa_is_tex_instruction(gl_inst_opcode opcode);
+ 
+ extern GLboolean
+ _mesa_check_soa_dependencies(const struct prog_instruction *inst);
+ 
+ extern const char *
+ _mesa_opcode_string(gl_inst_opcode opcode);
+ 
+ 
+ #endif /* PROG_INSTRUCTION_H */
diff --cc src/mesa/program/prog_optimize.c
index 00000000000,2941a17da3f..bd120b8643c
mode 000000,100644..100644
--- a/src/mesa/program/prog_optimize.c
+++ b/src/mesa/program/prog_optimize.c
@@@ -1,0 -1,1035 +1,1041 @@@
+ /*
+  * Mesa 3-D graphics library
+  * Version:  7.5
+  *
+  * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included
+  * in all copies or substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  */
+ 
+ 
+ 
+ #include "main/glheader.h"
+ #include "main/context.h"
+ #include "main/macros.h"
+ #include "program.h"
+ #include "prog_instruction.h"
+ #include "prog_optimize.h"
+ #include "prog_print.h"
+ 
+ 
+ #define MAX_LOOP_NESTING 50
+ 
+ 
+ static GLboolean dbg = GL_FALSE;
+ 
+ /* Returns the mask of channels read from the given srcreg in this instruction.
+  */
+ static GLuint
+ get_src_arg_mask(const struct prog_instruction *inst, int arg)
+ {
+    int writemask = inst->DstReg.WriteMask;
+ 
+    if (inst->CondUpdate)
+       writemask = WRITEMASK_XYZW;
+ 
+    switch (inst->Opcode) {
+    case OPCODE_MOV:
+    case OPCODE_ABS:
+    case OPCODE_ADD:
+    case OPCODE_MUL:
+    case OPCODE_SUB:
+       return writemask;
+    case OPCODE_RCP:
+    case OPCODE_SIN:
+    case OPCODE_COS:
+    case OPCODE_RSQ:
+    case OPCODE_POW:
+    case OPCODE_EX2:
+       return WRITEMASK_X;
+    case OPCODE_DP2:
+       return WRITEMASK_XY;
+    case OPCODE_DP3:
+    case OPCODE_XPD:
+       return WRITEMASK_XYZ;
+    default:
+       return WRITEMASK_XYZW;
+    }
+ }
+ 
+ /**
+  * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
+  * \return number of instructions removed
+  */
+ static GLuint
+ remove_instructions(struct gl_program *prog, const GLboolean *removeFlags)
+ {
+    GLint i, removeEnd = 0, removeCount = 0;
+    GLuint totalRemoved = 0;
+ 
+    /* go backward */
+    for (i = prog->NumInstructions - 1; i >= 0; i--) {
+       if (removeFlags[i]) {
+          totalRemoved++;
+          if (removeCount == 0) {
+             /* begin a run of instructions to remove */
+             removeEnd = i;
+             removeCount = 1;
+          }
+          else {
+             /* extend the run of instructions to remove */
+             removeCount++;
+          }
+       }
+       else {
+          /* don't remove this instruction, but check if the preceeding
+           * instructions are to be removed.
+           */
+          if (removeCount > 0) {
+             GLint removeStart = removeEnd - removeCount + 1;
+             _mesa_delete_instructions(prog, removeStart, removeCount);
+             removeStart = removeCount = 0; /* reset removal info */
+          }
+       }
+    }
+    /* Finish removing if the first instruction was to be removed. */
+    if (removeCount > 0) {
+       GLint removeStart = removeEnd - removeCount + 1;
+       _mesa_delete_instructions(prog, removeStart, removeCount);
+    }
+    return totalRemoved;
+ }
+ 
+ 
+ /**
+  * Remap register indexes according to map.
+  * \param prog  the program to search/replace
+  * \param file  the type of register file to search/replace
+  * \param map  maps old register indexes to new indexes
+  */
+ static void
+ replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
+ {
+    GLuint i;
+ 
+    for (i = 0; i < prog->NumInstructions; i++) {
+       struct prog_instruction *inst = prog->Instructions + i;
+       const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
+       GLuint j;
+       for (j = 0; j < numSrc; j++) {
+          if (inst->SrcReg[j].File == file) {
+             GLuint index = inst->SrcReg[j].Index;
+             ASSERT(map[index] >= 0);
+             inst->SrcReg[j].Index = map[index];
+          }
+       }
+       if (inst->DstReg.File == file) {
+          const GLuint index = inst->DstReg.Index;
+          ASSERT(map[index] >= 0);
+          inst->DstReg.Index = map[index];
+       }
+    }
+ }
+ 
+ 
+ /**
+  * Consolidate temporary registers to use low numbers.  For example, if the
+  * shader only uses temps 4, 5, 8, replace them with 0, 1, 2.
+  */
+ static void
+ _mesa_consolidate_registers(struct gl_program *prog)
+ {
+    GLboolean tempUsed[MAX_PROGRAM_TEMPS];
+    GLint tempMap[MAX_PROGRAM_TEMPS];
+    GLuint tempMax = 0, i;
+ 
+    if (dbg) {
+       printf("Optimize: Begin register consolidation\n");
+    }
+ 
+    memset(tempUsed, 0, sizeof(tempUsed));
+ 
+    for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+       tempMap[i] = -1;
+    }
+ 
+    /* set tempUsed[i] if temporary [i] is referenced */
+    for (i = 0; i < prog->NumInstructions; i++) {
+       const struct prog_instruction *inst = prog->Instructions + i;
+       const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
+       GLuint j;
+       for (j = 0; j < numSrc; j++) {
+          if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
+             const GLuint index = inst->SrcReg[j].Index;
+             ASSERT(index < MAX_PROGRAM_TEMPS);
+             tempUsed[index] = GL_TRUE;
+             tempMax = MAX2(tempMax, index);
+             break;
+          }
+       }
+       if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+          const GLuint index = inst->DstReg.Index;
+          ASSERT(index < MAX_PROGRAM_TEMPS);
+          tempUsed[index] = GL_TRUE;
+          tempMax = MAX2(tempMax, index);
+       }
+    }
+ 
+    /* allocate a new index for each temp that's used */
+    {
+       GLuint freeTemp = 0;
+       for (i = 0; i <= tempMax; i++) {
+          if (tempUsed[i]) {
+             tempMap[i] = freeTemp++;
+             /*printf("replace %u with %u\n", i, tempMap[i]);*/
+          }
+       }
+       if (freeTemp == tempMax + 1) {
+          /* no consolidation possible */
+          return;
+       }         
+       if (dbg) {
+          printf("Replace regs 0..%u with 0..%u\n", tempMax, freeTemp-1);
+       }
+    }
+ 
+    replace_regs(prog, PROGRAM_TEMPORARY, tempMap);
+ 
+    if (dbg) {
+       printf("Optimize: End register consolidation\n");
+    }
+ }
+ 
+ 
+ /**
+  * Remove dead instructions from the given program.
+  * This is very primitive for now.  Basically look for temp registers
+  * that are written to but never read.  Remove any instructions that
+  * write to such registers.  Be careful with condition code setters.
+  */
+ static void
+ _mesa_remove_dead_code(struct gl_program *prog)
+ {
+    GLboolean tempRead[MAX_PROGRAM_TEMPS][4];
+    GLboolean *removeInst; /* per-instruction removal flag */
+    GLuint i, rem = 0, comp;
+ 
+    memset(tempRead, 0, sizeof(tempRead));
+ 
+    if (dbg) {
+       printf("Optimize: Begin dead code removal\n");
+       /*_mesa_print_program(prog);*/
+    }
+ 
+    removeInst = (GLboolean *)
+       calloc(1, prog->NumInstructions * sizeof(GLboolean));
+ 
+    /* Determine which temps are read and written */
+    for (i = 0; i < prog->NumInstructions; i++) {
+       const struct prog_instruction *inst = prog->Instructions + i;
+       const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
+       GLuint j;
+ 
+       /* check src regs */
+       for (j = 0; j < numSrc; j++) {
+          if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
+             const GLuint index = inst->SrcReg[j].Index;
+             GLuint read_mask;
+             ASSERT(index < MAX_PROGRAM_TEMPS);
+ 	    read_mask = get_src_arg_mask(inst, j);
+ 
+             if (inst->SrcReg[j].RelAddr) {
+                if (dbg)
+                   printf("abort remove dead code (indirect temp)\n");
+                goto done;
+             }
+ 
+ 	    for (comp = 0; comp < 4; comp++) {
+ 	       GLuint swz = (inst->SrcReg[j].Swizzle >> (3 * comp)) & 0x7;
+ 
+ 	       if ((read_mask & (1 << comp)) == 0)
+ 		  continue;
+ 
+ 	       switch (swz) {
+ 	       case SWIZZLE_X:
+ 		  tempRead[index][0] = GL_TRUE;
+ 		  break;
+ 	       case SWIZZLE_Y:
+ 		  tempRead[index][1] = GL_TRUE;
+ 		  break;
+ 	       case SWIZZLE_Z:
+ 		  tempRead[index][2] = GL_TRUE;
+ 		  break;
+ 	       case SWIZZLE_W:
+ 		  tempRead[index][3] = GL_TRUE;
+ 		  break;
+ 	       }
+ 	    }
+          }
+       }
+ 
+       /* check dst reg */
+       if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+          const GLuint index = inst->DstReg.Index;
+          ASSERT(index < MAX_PROGRAM_TEMPS);
+ 
+          if (inst->DstReg.RelAddr) {
+             if (dbg)
+                printf("abort remove dead code (indirect temp)\n");
+             goto done;
+          }
+ 
+          if (inst->CondUpdate) {
+             /* If we're writing to this register and setting condition
+              * codes we cannot remove the instruction.  Prevent removal
+              * by setting the 'read' flag.
+              */
+             tempRead[index][0] = GL_TRUE;
+             tempRead[index][1] = GL_TRUE;
+             tempRead[index][2] = GL_TRUE;
+             tempRead[index][3] = GL_TRUE;
+          }
+       }
+    }
+ 
+    /* find instructions that write to dead registers, flag for removal */
+    for (i = 0; i < prog->NumInstructions; i++) {
+       struct prog_instruction *inst = prog->Instructions + i;
+       const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
+ 
+       if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
+          GLint chan, index = inst->DstReg.Index;
+ 
+ 	 for (chan = 0; chan < 4; chan++) {
+ 	    if (!tempRead[index][chan] &&
+ 		inst->DstReg.WriteMask & (1 << chan)) {
+ 	       if (dbg) {
+ 		  printf("Remove writemask on %u.%c\n", i,
+ 			       chan == 3 ? 'w' : 'x' + chan);
+ 	       }
+ 	       inst->DstReg.WriteMask &= ~(1 << chan);
+ 	       rem++;
+ 	    }
+ 	 }
+ 
+ 	 if (inst->DstReg.WriteMask == 0) {
+ 	    /* If we cleared all writes, the instruction can be removed. */
+ 	    if (dbg)
+ 	       printf("Remove instruction %u: \n", i);
+ 	    removeInst[i] = GL_TRUE;
+ 	 }
+       }
+    }
+ 
+    /* now remove the instructions which aren't needed */
+    rem = remove_instructions(prog, removeInst);
+ 
+    if (dbg) {
+       printf("Optimize: End dead code removal.\n");
+       printf("  %u channel writes removed\n", rem);
+       printf("  %u instructions removed\n", rem);
+       /*_mesa_print_program(prog);*/
+    }
+ 
+ done:
+    free(removeInst);
+ }
+ 
+ 
+ enum temp_use
+ {
+    READ,
+    WRITE,
+    FLOW,
+    END
+ };
+ 
+ /**
+  * Scan forward in program from 'start' for the next occurance of TEMP[index].
+  * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
+  * that we can't look further.
+  */
+ static enum temp_use
+ find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index)
+ {
+    GLuint i;
+ 
+    for (i = start; i < prog->NumInstructions; i++) {
+       const struct prog_instruction *inst = prog->Instructions + i;
+       switch (inst->Opcode) {
+       case OPCODE_BGNLOOP:
+       case OPCODE_ENDLOOP:
+       case OPCODE_BGNSUB:
+       case OPCODE_ENDSUB:
+          return FLOW;
+       default:
+          {
+             const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
+             GLuint j;
+             for (j = 0; j < numSrc; j++) {
+                if (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
+                    inst->SrcReg[j].Index == index)
+                   return READ;
+             }
+             if (inst->DstReg.File == PROGRAM_TEMPORARY &&
+                 inst->DstReg.Index == index)
+                return WRITE;
+          }
+       }
+    }
+ 
+    return END;
+ }
+ 
+ static GLboolean _mesa_is_flow_control_opcode(enum prog_opcode opcode)
+ {
+    switch (opcode) {
+    case OPCODE_BGNLOOP:
+    case OPCODE_BGNSUB:
+    case OPCODE_BRA:
+    case OPCODE_CAL:
+    case OPCODE_CONT:
+    case OPCODE_IF:
+    case OPCODE_ELSE:
+    case OPCODE_END:
+    case OPCODE_ENDIF:
+    case OPCODE_ENDLOOP:
+    case OPCODE_ENDSUB:
+    case OPCODE_RET:
+       return GL_TRUE;
+    default:
+       return GL_FALSE;
+    }
+ }
+ 
+ /**
+  * Try to remove use of extraneous MOV instructions, to free them up for dead
+  * code removal.
+  */
+ static void
+ _mesa_remove_extra_move_use(struct gl_program *prog)
+ {
+    GLuint i, j;
+ 
+    if (dbg) {
+       printf("Optimize: Begin remove extra move use\n");
+       _mesa_print_program(prog);
+    }
+ 
+    /*
+     * Look for sequences such as this:
+     *    MOV tmpX, arg0;
+     *    ...
+     *    FOO tmpY, tmpX, arg1;
+     * and convert into:
+     *    MOV tmpX, arg0;
+     *    ...
+     *    FOO tmpY, arg0, arg1;
+     */
+ 
+    for (i = 0; i + 1 < prog->NumInstructions; i++) {
+       const struct prog_instruction *mov = prog->Instructions + i;
+ 
+       if (mov->Opcode != OPCODE_MOV ||
+ 	  mov->DstReg.File != PROGRAM_TEMPORARY ||
+ 	  mov->DstReg.RelAddr ||
+ 	  mov->DstReg.CondMask != COND_TR ||
+ 	  mov->SaturateMode != SATURATE_OFF ||
+ 	  mov->SrcReg[0].RelAddr)
+ 	 continue;
+ 
+       /* Walk through remaining instructions until the or src reg gets
+        * rewritten or we get into some flow-control, eliminating the use of
+        * this MOV.
+        */
+       for (j = i + 1; j < prog->NumInstructions; j++) {
+ 	 struct prog_instruction *inst2 = prog->Instructions + j;
+ 	 GLuint arg;
+ 
+ 	 if (_mesa_is_flow_control_opcode(inst2->Opcode))
+ 	     break;
+ 
+ 	 /* First rewrite this instruction's args if appropriate. */
+ 	 for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
+ 	    int comp;
+ 	    int read_mask = get_src_arg_mask(inst2, arg);
+ 
+ 	    if (inst2->SrcReg[arg].File != mov->DstReg.File ||
+ 		inst2->SrcReg[arg].Index != mov->DstReg.Index ||
+ 		inst2->SrcReg[arg].RelAddr ||
+ 		inst2->SrcReg[arg].Abs)
+ 	       continue;
+ 
+ 	    /* Check that all the sources for this arg of inst2 come from inst1
+ 	     * or constants.
+ 	     */
+ 	    for (comp = 0; comp < 4; comp++) {
+ 	       int src_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
+ 
+ 	       /* If the MOV didn't write that channel, can't use it. */
+ 	       if ((read_mask & (1 << comp)) &&
+ 		   src_swz <= SWIZZLE_W &&
+ 		   (mov->DstReg.WriteMask & (1 << src_swz)) == 0)
+ 		  break;
+ 	    }
+ 	    if (comp != 4)
+ 	       continue;
+ 
+ 	    /* Adjust the swizzles of inst2 to point at MOV's source */
+ 	    for (comp = 0; comp < 4; comp++) {
+ 	       int inst2_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
+ 
+ 	       if (inst2_swz <= SWIZZLE_W) {
+ 		  GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
+ 		  inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
+ 		  inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
+ 		  inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
+ 						  inst2_swz) & 0x1) << comp);
+ 	       }
+ 	    }
+ 	    inst2->SrcReg[arg].File = mov->SrcReg[0].File;
+ 	    inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
+ 	 }
+ 
+ 	 /* If this instruction overwrote part of the move, our time is up. */
+ 	 if ((inst2->DstReg.File == mov->DstReg.File &&
+ 	      (inst2->DstReg.RelAddr ||
+ 	       inst2->DstReg.Index == mov->DstReg.Index)) ||
+ 	     (inst2->DstReg.File == mov->SrcReg[0].File &&
+ 	      (inst2->DstReg.RelAddr ||
+ 	       inst2->DstReg.Index == mov->SrcReg[0].Index)))
+ 	    break;
+       }
+    }
+ 
+    if (dbg) {
+       printf("Optimize: End remove extra move use.\n");
+       /*_mesa_print_program(prog);*/
+    }
+ }
+ 
+ /**
+  * Try to remove extraneous MOV instructions from the given program.
+  */
+ static void
+ _mesa_remove_extra_moves(struct gl_program *prog)
+ {
+    GLboolean *removeInst; /* per-instruction removal flag */
+    GLuint i, rem, loopNesting = 0, subroutineNesting = 0;
+ 
+    if (dbg) {
+       printf("Optimize: Begin remove extra moves\n");
+       _mesa_print_program(prog);
+    }
+ 
+    removeInst = (GLboolean *)
+       calloc(1, prog->NumInstructions * sizeof(GLboolean));
+ 
+    /*
+     * Look for sequences such as this:
+     *    FOO tmpX, arg0, arg1;
+     *    MOV tmpY, tmpX;
+     * and convert into:
+     *    FOO tmpY, arg0, arg1;
+     */
+ 
+    for (i = 0; i < prog->NumInstructions; i++) {
+       const struct prog_instruction *inst = prog->Instructions + i;
+ 
+       switch (inst->Opcode) {
+       case OPCODE_BGNLOOP:
+          loopNesting++;
+          break;
+       case OPCODE_ENDLOOP:
+          loopNesting--;
+          break;
+       case OPCODE_BGNSUB:
+          subroutineNesting++;
+          break;
+       case OPCODE_ENDSUB:
+          subroutineNesting--;
+          break;
+       case OPCODE_MOV:
+          if (i > 0 &&
+              loopNesting == 0 &&
+              subroutineNesting == 0 &&
+              inst->SrcReg[0].File == PROGRAM_TEMPORARY &&
+              inst->SrcReg[0].Swizzle == SWIZZLE_XYZW) {
+             /* see if this MOV can be removed */
+             const GLuint tempIndex = inst->SrcReg[0].Index;
+             struct prog_instruction *prevInst;
+             GLuint prevI;
+ 
+             /* get pointer to previous instruction */
+             prevI = i - 1;
+             while (prevI > 0 && removeInst[prevI])
+                prevI--;
+             prevInst = prog->Instructions + prevI;
+ 
+             if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
+                 prevInst->DstReg.Index == tempIndex &&
+                 prevInst->DstReg.WriteMask == WRITEMASK_XYZW) {
+ 
+                enum temp_use next_use =
+                   find_next_temp_use(prog, i + 1, tempIndex);
+ 
+                if (next_use == WRITE || next_use == END) {
+                   /* OK, we can safely remove this MOV instruction.
+                    * Transform:
+                    *   prevI: FOO tempIndex, x, y;
+                    *       i: MOV z, tempIndex;
+                    * Into:
+                    *   prevI: FOO z, x, y;
+                    */
+ 
+                   /* patch up prev inst */
+                   prevInst->DstReg.File = inst->DstReg.File;
+                   prevInst->DstReg.Index = inst->DstReg.Index;
+ 
+                   /* flag this instruction for removal */
+                   removeInst[i] = GL_TRUE;
+ 
+                   if (dbg) {
+                      printf("Remove MOV at %u\n", i);
+                      printf("new prev inst %u: ", prevI);
+                      _mesa_print_instruction(prevInst);
+                   }
+                }
+             }
+          }
+          break;
+       default:
+          ; /* nothing */
+       }
+    }
+ 
+    /* now remove the instructions which aren't needed */
+    rem = remove_instructions(prog, removeInst);
+ 
+    free(removeInst);
+ 
+    if (dbg) {
+       printf("Optimize: End remove extra moves.  %u instructions removed\n", rem);
+       /*_mesa_print_program(prog);*/
+    }
+ }
+ 
+ 
+ /** A live register interval */
+ struct interval
+ {
+    GLuint Reg;         /** The temporary register index */
+    GLuint Start, End;  /** Start/end instruction numbers */
+ };
+ 
+ 
+ /** A list of register intervals */
+ struct interval_list
+ {
+    GLuint Num;
+    struct interval Intervals[MAX_PROGRAM_TEMPS];
+ };
+ 
+ 
+ static void
+ append_interval(struct interval_list *list, const struct interval *inv)
+ {
+    list->Intervals[list->Num++] = *inv;
+ }
+ 
+ 
+ /** Insert interval inv into list, sorted by interval end */
+ static void
+ insert_interval_by_end(struct interval_list *list, const struct interval *inv)
+ {
+    /* XXX we could do a binary search insertion here since list is sorted */
+    GLint i = list->Num - 1;
+    while (i >= 0 && list->Intervals[i].End > inv->End) {
+       list->Intervals[i + 1] = list->Intervals[i];
+       i--;
+    }
+    list->Intervals[i + 1] = *inv;
+    list->Num++;
+ 
+ #ifdef DEBUG
+    {
+       GLuint i;
+       for (i = 0; i + 1 < list->Num; i++) {
+          ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End);
+       }
+    }
+ #endif
+ }
+ 
+ 
+ /** Remove the given interval from the interval list */
+ static void
+ remove_interval(struct interval_list *list, const struct interval *inv)
+ {
+    /* XXX we could binary search since list is sorted */
+    GLuint k;
+    for (k = 0; k < list->Num; k++) {
+       if (list->Intervals[k].Reg == inv->Reg) {
+          /* found, remove it */
+          ASSERT(list->Intervals[k].Start == inv->Start);
+          ASSERT(list->Intervals[k].End == inv->End);
+          while (k < list->Num - 1) {
+             list->Intervals[k] = list->Intervals[k + 1];
+             k++;
+          }
+          list->Num--;
+          return;
+       }
+    }
+ }
+ 
+ 
+ /** called by qsort() */
+ static int
+ compare_start(const void *a, const void *b)
+ {
+    const struct interval *ia = (const struct interval *) a;
+    const struct interval *ib = (const struct interval *) b;
+    if (ia->Start < ib->Start)
+       return -1;
+    else if (ia->Start > ib->Start)
+       return +1;
+    else
+       return 0;
+ }
+ 
+ /** sort the interval list according to interval starts */
+ static void
+ sort_interval_list_by_start(struct interval_list *list)
+ {
+    qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
+ #ifdef DEBUG
+    {
+       GLuint i;
+       for (i = 0; i + 1 < list->Num; i++) {
+          ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
+       }
+    }
+ #endif
+ }
+ 
++struct loop_info
++{
++   GLuint Start, End;  /**< Start, end instructions of loop */
++};
+ 
+ /**
+  * Update the intermediate interval info for register 'index' and
+  * instruction 'ic'.
+  */
+ static void
 -update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic)
++update_interval(GLint intBegin[], GLint intEnd[],
++		struct loop_info *loopStack, GLuint loopStackDepth,
++		GLuint index, GLuint ic)
+ {
++   int i;
++
++   /* If the register is used in a loop, extend its lifetime through the end
++    * of the outermost loop that doesn't contain its definition.
++    */
++   for (i = 0; i < loopStackDepth; i++) {
++      if (intBegin[index] < loopStack[i].Start) {
++	 ic = loopStack[i].End;
++	 break;
++      }
++   }
++
+    ASSERT(index < MAX_PROGRAM_TEMPS);
+    if (intBegin[index] == -1) {
+       ASSERT(intEnd[index] == -1);
+       intBegin[index] = intEnd[index] = ic;
+    }
+    else {
+       intEnd[index] = ic;
+    }
+ }
+ 
+ 
+ /**
+  * Find first/last instruction that references each temporary register.
+  */
+ GLboolean
+ _mesa_find_temp_intervals(const struct prog_instruction *instructions,
+                           GLuint numInstructions,
+                           GLint intBegin[MAX_PROGRAM_TEMPS],
+                           GLint intEnd[MAX_PROGRAM_TEMPS])
+ {
 -   struct loop_info
 -   {
 -      GLuint Start, End;  /**< Start, end instructions of loop */
 -   };
+    struct loop_info loopStack[MAX_LOOP_NESTING];
+    GLuint loopStackDepth = 0;
+    GLuint i;
+ 
+    for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
+       intBegin[i] = intEnd[i] = -1;
+    }
+ 
+    /* Scan instructions looking for temporary registers */
+    for (i = 0; i < numInstructions; i++) {
+       const struct prog_instruction *inst = instructions + i;
+       if (inst->Opcode == OPCODE_BGNLOOP) {
+          loopStack[loopStackDepth].Start = i;
+          loopStack[loopStackDepth].End = inst->BranchTarget;
+          loopStackDepth++;
+       }
+       else if (inst->Opcode == OPCODE_ENDLOOP) {
+          loopStackDepth--;
+       }
+       else if (inst->Opcode == OPCODE_CAL) {
+          return GL_FALSE;
+       }
+       else {
+          const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
+          GLuint j;
+          for (j = 0; j < numSrc; j++) {
+             if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
+                const GLuint index = inst->SrcReg[j].Index;
+                if (inst->SrcReg[j].RelAddr)
+                   return GL_FALSE;
 -               update_interval(intBegin, intEnd, index, i);
 -               if (loopStackDepth > 0) {
 -                  /* extend temp register's interval to end of loop */
 -                  GLuint loopEnd = loopStack[loopStackDepth - 1].End;
 -                  update_interval(intBegin, intEnd, index, loopEnd);
 -               }
++               update_interval(intBegin, intEnd, loopStack, loopStackDepth,
++			       index, i);
+             }
+          }
+          if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+             const GLuint index = inst->DstReg.Index;
+             if (inst->DstReg.RelAddr)
+                return GL_FALSE;
 -            update_interval(intBegin, intEnd, index, i);
 -            if (loopStackDepth > 0) {
 -               /* extend temp register's interval to end of loop */
 -               GLuint loopEnd = loopStack[loopStackDepth - 1].End;
 -               update_interval(intBegin, intEnd, index, loopEnd);
 -            }
++            update_interval(intBegin, intEnd, loopStack, loopStackDepth,
++			    index, i);
+          }
+       }
+    }
+ 
+    return GL_TRUE;
+ }
+ 
+ 
+ /**
+  * Find the live intervals for each temporary register in the program.
+  * For register R, the interval [A,B] indicates that R is referenced
+  * from instruction A through instruction B.
+  * Special consideration is needed for loops and subroutines.
+  * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
+  */
+ static GLboolean
+ find_live_intervals(struct gl_program *prog,
+                     struct interval_list *liveIntervals)
+ {
+    GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS];
+    GLuint i;
+ 
+    /*
+     * Note: we'll return GL_FALSE below if we find relative indexing
+     * into the TEMP register file.  We can't handle that yet.
+     * We also give up on subroutines for now.
+     */
+ 
+    if (dbg) {
+       printf("Optimize: Begin find intervals\n");
+    }
+ 
+    /* build intermediate arrays */
+    if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
+                                   intBegin, intEnd))
+       return GL_FALSE;
+ 
+    /* Build live intervals list from intermediate arrays */
+    liveIntervals->Num = 0;
+    for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+       if (intBegin[i] >= 0) {
+          struct interval inv;
+          inv.Reg = i;
+          inv.Start = intBegin[i];
+          inv.End = intEnd[i];
+          append_interval(liveIntervals, &inv);
+       }
+    }
+ 
+    /* Sort the list according to interval starts */
+    sort_interval_list_by_start(liveIntervals);
+ 
+    if (dbg) {
+       /* print interval info */
+       for (i = 0; i < liveIntervals->Num; i++) {
+          const struct interval *inv = liveIntervals->Intervals + i;
+          printf("Reg[%d] live [%d, %d]:",
+                       inv->Reg, inv->Start, inv->End);
+          if (1) {
+             GLuint j;
+             for (j = 0; j < inv->Start; j++)
+                printf(" ");
+             for (j = inv->Start; j <= inv->End; j++)
+                printf("x");
+          }
+          printf("\n");
+       }
+    }
+ 
+    return GL_TRUE;
+ }
+ 
+ 
+ /** Scan the array of used register flags to find free entry */
+ static GLint
+ alloc_register(GLboolean usedRegs[MAX_PROGRAM_TEMPS])
+ {
+    GLuint k;
+    for (k = 0; k < MAX_PROGRAM_TEMPS; k++) {
+       if (!usedRegs[k]) {
+          usedRegs[k] = GL_TRUE;
+          return k;
+       }
+    }
+    return -1;
+ }
+ 
+ 
+ /**
+  * This function implements "Linear Scan Register Allocation" to reduce
+  * the number of temporary registers used by the program.
+  *
+  * We compute the "live interval" for all temporary registers then
+  * examine the overlap of the intervals to allocate new registers.
+  * Basically, if two intervals do not overlap, they can use the same register.
+  */
+ static void
+ _mesa_reallocate_registers(struct gl_program *prog)
+ {
+    struct interval_list liveIntervals;
+    GLint registerMap[MAX_PROGRAM_TEMPS];
+    GLboolean usedRegs[MAX_PROGRAM_TEMPS];
+    GLuint i;
+    GLint maxTemp = -1;
+ 
+    if (dbg) {
+       printf("Optimize: Begin live-interval register reallocation\n");
+       _mesa_print_program(prog);
+    }
+ 
+    for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
+       registerMap[i] = -1;
+       usedRegs[i] = GL_FALSE;
+    }
+ 
+    if (!find_live_intervals(prog, &liveIntervals)) {
+       if (dbg)
+          printf("Aborting register reallocation\n");
+       return;
+    }
+ 
+    {
+       struct interval_list activeIntervals;
+       activeIntervals.Num = 0;
+ 
+       /* loop over live intervals, allocating a new register for each */
+       for (i = 0; i < liveIntervals.Num; i++) {
+          const struct interval *live = liveIntervals.Intervals + i;
+ 
+          if (dbg)
+             printf("Consider register %u\n", live->Reg);
+ 
+          /* Expire old intervals.  Intervals which have ended with respect
+           * to the live interval can have their remapped registers freed.
+           */
+          {
+             GLint j;
+             for (j = 0; j < (GLint) activeIntervals.Num; j++) {
+                const struct interval *inv = activeIntervals.Intervals + j;
+                if (inv->End >= live->Start) {
+                   /* Stop now.  Since the activeInterval list is sorted
+                    * we know we don't have to go further.
+                    */
+                   break;
+                }
+                else {
+                   /* Interval 'inv' has expired */
+                   const GLint regNew = registerMap[inv->Reg];
+                   ASSERT(regNew >= 0);
+ 
+                   if (dbg)
+                      printf("  expire interval for reg %u\n", inv->Reg);
+ 
+                   /* remove interval j from active list */
+                   remove_interval(&activeIntervals, inv);
+                   j--;  /* counter-act j++ in for-loop above */
+ 
+                   /* return register regNew to the free pool */
+                   if (dbg)
+                      printf("  free reg %d\n", regNew);
+                   ASSERT(usedRegs[regNew] == GL_TRUE);
+                   usedRegs[regNew] = GL_FALSE;
+                }
+             }
+          }
+ 
+          /* find a free register for this live interval */
+          {
+             const GLint k = alloc_register(usedRegs);
+             if (k < 0) {
+                /* out of registers, give up */
+                return;
+             }
+             registerMap[live->Reg] = k;
+             maxTemp = MAX2(maxTemp, k);
+             if (dbg)
+                printf("  remap register %u -> %d\n", live->Reg, k);
+          }
+ 
+          /* Insert this live interval into the active list which is sorted
+           * by increasing end points.
+           */
+          insert_interval_by_end(&activeIntervals, live);
+       }
+    }
+ 
+    if (maxTemp + 1 < (GLint) liveIntervals.Num) {
+       /* OK, we've reduced the number of registers needed.
+        * Scan the program and replace all the old temporary register
+        * indexes with the new indexes.
+        */
+       replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
+ 
+       prog->NumTemporaries = maxTemp + 1;
+    }
+ 
+    if (dbg) {
+       printf("Optimize: End live-interval register reallocation\n");
+       printf("Num temp regs before: %u  after: %u\n",
+                    liveIntervals.Num, maxTemp + 1);
+       _mesa_print_program(prog);
+    }
+ }
+ 
+ 
+ /**
+  * Apply optimizations to the given program to eliminate unnecessary
+  * instructions, temp regs, etc.
+  */
+ void
+ _mesa_optimize_program(GLcontext *ctx, struct gl_program *program)
+ {
+    _mesa_remove_extra_move_use(program);
+ 
+    if (1)
+       _mesa_remove_dead_code(program);
+ 
+    if (0) /* not tested much yet */
+       _mesa_remove_extra_moves(program);
+ 
+    if (0)
+       _mesa_consolidate_registers(program);
+    else
+       _mesa_reallocate_registers(program);
+ }
diff --cc src/mesa/program/prog_parameter.c
index 00000000000,aac488c79ab..ddbfe95c152
mode 000000,100644..100644
--- a/src/mesa/program/prog_parameter.c
+++ b/src/mesa/program/prog_parameter.c
@@@ -1,0 -1,751 +1,751 @@@
+ /*
+  * Mesa 3-D graphics library
+  * Version:  7.3
+  *
+  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included
+  * in all copies or substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  */
+ 
+ /**
+  * \file prog_parameter.c
+  * Program parameter lists and functions.
+  * \author Brian Paul
+  */
+ 
+ 
+ #include "main/glheader.h"
+ #include "main/imports.h"
+ #include "main/macros.h"
+ #include "prog_instruction.h"
+ #include "prog_parameter.h"
+ #include "prog_statevars.h"
+ 
+ 
+ struct gl_program_parameter_list *
+ _mesa_new_parameter_list(void)
+ {
+    return CALLOC_STRUCT(gl_program_parameter_list);
+ }
+ 
+ 
+ struct gl_program_parameter_list *
+ _mesa_new_parameter_list_sized(unsigned size)
+ {
+    struct gl_program_parameter_list *p = _mesa_new_parameter_list();
+ 
+    if ((p != NULL) && (size != 0)) {
+       p->Size = size;
+ 
+       /* alloc arrays */
+       p->Parameters = (struct gl_program_parameter *)
+ 	 calloc(1, size * sizeof(struct gl_program_parameter));
+ 
+       p->ParameterValues = (GLfloat (*)[4])
+          _mesa_align_malloc(size * 4 *sizeof(GLfloat), 16);
+ 
+ 
+       if ((p->Parameters == NULL) || (p->ParameterValues == NULL)) {
+ 	 free(p->Parameters);
+ 	 _mesa_align_free(p->ParameterValues);
+ 	 free(p);
+ 	 p = NULL;
+       }
+    }
+ 
+    return p;
+ }
+ 
+ 
+ /**
+  * Free a parameter list and all its parameters
+  */
+ void
+ _mesa_free_parameter_list(struct gl_program_parameter_list *paramList)
+ {
+    GLuint i;
+    for (i = 0; i < paramList->NumParameters; i++) {
+       if (paramList->Parameters[i].Name)
+ 	 free((void *) paramList->Parameters[i].Name);
+    }
+    free(paramList->Parameters);
+    if (paramList->ParameterValues)
+       _mesa_align_free(paramList->ParameterValues);
+    free(paramList);
+ }
+ 
+ 
+ /**
+  * Add a new parameter to a parameter list.
+  * Note that parameter values are usually 4-element GLfloat vectors.
+  * When size > 4 we'll allocate a sequential block of parameters to
+  * store all the values (in blocks of 4).
+  *
+  * \param paramList  the list to add the parameter to
+  * \param type  type of parameter, such as 
+  * \param name  the parameter name, will be duplicated/copied!
+  * \param size  number of elements in 'values' vector (1..4, or more)
+  * \param datatype  GL_FLOAT, GL_FLOAT_VECx, GL_INT, GL_INT_VECx or GL_NONE.
+  * \param values  initial parameter value, up to 4 GLfloats, or NULL
+  * \param state  state indexes, or NULL
+  * \return  index of new parameter in the list, or -1 if error (out of mem)
+  */
+ GLint
+ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
+                     gl_register_file type, const char *name,
+                     GLuint size, GLenum datatype, const GLfloat *values,
+                     const gl_state_index state[STATE_LENGTH],
+                     GLbitfield flags)
+ {
+    const GLuint oldNum = paramList->NumParameters;
+    const GLuint sz4 = (size + 3) / 4; /* no. of new param slots needed */
+ 
+    assert(size > 0);
+ 
+    if (oldNum + sz4 > paramList->Size) {
+       /* Need to grow the parameter list array (alloc some extra) */
+       paramList->Size = paramList->Size + 4 * sz4;
+ 
+       /* realloc arrays */
+       paramList->Parameters = (struct gl_program_parameter *)
+ 	 _mesa_realloc(paramList->Parameters,
+ 		       oldNum * sizeof(struct gl_program_parameter),
+ 		       paramList->Size * sizeof(struct gl_program_parameter));
+ 
+       paramList->ParameterValues = (GLfloat (*)[4])
+          _mesa_align_realloc(paramList->ParameterValues,         /* old buf */
+                              oldNum * 4 * sizeof(GLfloat),      /* old size */
+                              paramList->Size * 4 *sizeof(GLfloat), /* new sz */
+                              16);
+    }
+ 
+    if (!paramList->Parameters ||
+        !paramList->ParameterValues) {
+       /* out of memory */
+       paramList->NumParameters = 0;
+       paramList->Size = 0;
+       return -1;
+    }
+    else {
+       GLuint i;
+ 
+       paramList->NumParameters = oldNum + sz4;
+ 
+       memset(&paramList->Parameters[oldNum], 0,
+              sz4 * sizeof(struct gl_program_parameter));
+ 
+       for (i = 0; i < sz4; i++) {
+          struct gl_program_parameter *p = paramList->Parameters + oldNum + i;
+          p->Name = name ? _mesa_strdup(name) : NULL;
+          p->Type = type;
+          p->Size = size;
+          p->DataType = datatype;
+          p->Flags = flags;
+          if (values) {
+             COPY_4V(paramList->ParameterValues[oldNum + i], values);
+             values += 4;
+             p->Initialized = GL_TRUE;
+          }
+          else {
+             /* silence valgrind */
+             ASSIGN_4V(paramList->ParameterValues[oldNum + i], 0, 0, 0, 0);
+          }
+          size -= 4;
+       }
+ 
+       if (state) {
+          for (i = 0; i < STATE_LENGTH; i++)
+             paramList->Parameters[oldNum].StateIndexes[i] = state[i];
+       }
+ 
+       return (GLint) oldNum;
+    }
+ }
+ 
+ 
+ /**
+  * Add a new named program parameter (Ex: NV_fragment_program DEFINE statement)
+  * \return index of the new entry in the parameter list
+  */
+ GLint
+ _mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
+                           const char *name, const GLfloat values[4])
+ {
+    return _mesa_add_parameter(paramList, PROGRAM_NAMED_PARAM, name,
+                               4, GL_NONE, values, NULL, 0x0);
+                               
+ }
+ 
+ 
+ /**
+  * Add a new named constant to the parameter list.
+  * This will be used when the program contains something like this:
+  *    PARAM myVals = { 0, 1, 2, 3 };
+  *
+  * \param paramList  the parameter list
+  * \param name  the name for the constant
+  * \param values  four float values
+  * \return index/position of the new parameter in the parameter list
+  */
+ GLint
+ _mesa_add_named_constant(struct gl_program_parameter_list *paramList,
+                          const char *name, const GLfloat values[4],
+                          GLuint size)
+ {
+    /* first check if this is a duplicate constant */
+    GLint pos;
+    for (pos = 0; pos < (GLint)paramList->NumParameters; pos++) {
+       const GLfloat *pvals = paramList->ParameterValues[pos];
+       if (pvals[0] == values[0] &&
+           pvals[1] == values[1] &&
+           pvals[2] == values[2] &&
+           pvals[3] == values[3] &&
+           strcmp(paramList->Parameters[pos].Name, name) == 0) {
+          /* Same name and value is already in the param list - reuse it */
+          return pos;
+       }
+    }
+    /* not found, add new parameter */
+    return _mesa_add_parameter(paramList, PROGRAM_CONSTANT, name,
+                               size, GL_NONE, values, NULL, 0x0);
+ }
+ 
+ 
+ /**
+  * Add a new unnamed constant to the parameter list.  This will be used
+  * when a fragment/vertex program contains something like this:
+  *    MOV r, { 0, 1, 2, 3 };
+  * If swizzleOut is non-null we'll search the parameter list for an
+  * existing instance of the constant which matches with a swizzle.
+  *
+  * \param paramList  the parameter list
+  * \param values  four float values
+  * \param swizzleOut  returns swizzle mask for accessing the constant
+  * \return index/position of the new parameter in the parameter list.
+  */
+ GLint
+ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
+                            const GLfloat values[4], GLuint size,
+                            GLuint *swizzleOut)
+ {
+    GLint pos;
+    ASSERT(size >= 1);
+    ASSERT(size <= 4);
+ 
+    if (swizzleOut &&
+        _mesa_lookup_parameter_constant(paramList, values,
+                                        size, &pos, swizzleOut)) {
+       return pos;
+    }
+ 
+    /* Look for empty space in an already unnamed constant parameter
+     * to add this constant.  This will only work for single-element
+     * constants because we rely on smearing (i.e. .yyyy or .zzzz).
+     */
+    if (size == 1 && swizzleOut) {
+       for (pos = 0; pos < (GLint) paramList->NumParameters; pos++) {
+          struct gl_program_parameter *p = paramList->Parameters + pos;
+          if (p->Type == PROGRAM_CONSTANT && p->Size + size <= 4) {
+             /* ok, found room */
+             GLfloat *pVal = paramList->ParameterValues[pos];
+             GLuint swz = p->Size; /* 1, 2 or 3 for Y, Z, W */
+             pVal[p->Size] = values[0];
+             p->Size++;
+             *swizzleOut = MAKE_SWIZZLE4(swz, swz, swz, swz);
+             return pos;
+          }
+       }
+    }
+ 
+    /* add a new parameter to store this constant */
+    pos = _mesa_add_parameter(paramList, PROGRAM_CONSTANT, NULL,
+                              size, GL_NONE, values, NULL, 0x0);
+    if (pos >= 0 && swizzleOut) {
+       if (size == 1)
+          *swizzleOut = SWIZZLE_XXXX;
+       else
+          *swizzleOut = SWIZZLE_NOOP;
+    }
+    return pos;
+ }
+ 
+ 
+ /**
+  * Add a uniform to the parameter list.
+  * Note that if the uniform is an array, size may be greater than
+  * what's implied by the datatype.
+  * \param name  uniform's name
+  * \param size  number of floats to allocate
+  * \param datatype  GL_FLOAT_VEC3, GL_FLOAT_MAT4, etc.
+  */
+ GLint
+ _mesa_add_uniform(struct gl_program_parameter_list *paramList,
+                   const char *name, GLuint size, GLenum datatype,
+                   const GLfloat *values)
+ {
+    GLint i = _mesa_lookup_parameter_index(paramList, -1, name);
+    ASSERT(datatype != GL_NONE);
+    if (i >= 0 && paramList->Parameters[i].Type == PROGRAM_UNIFORM) {
+       ASSERT(paramList->Parameters[i].Size == size);
+       ASSERT(paramList->Parameters[i].DataType == datatype);
+       /* already in list */
+       return i;
+    }
+    else {
+       i = _mesa_add_parameter(paramList, PROGRAM_UNIFORM, name,
+                               size, datatype, values, NULL, 0x0);
+       return i;
+    }
+ }
+ 
+ 
+ /**
+  * Mark the named uniform as 'used'.
+  */
+ void
+ _mesa_use_uniform(struct gl_program_parameter_list *paramList,
+                   const char *name)
+ {
+    GLuint i;
+    for (i = 0; i < paramList->NumParameters; i++) {
+       struct gl_program_parameter *p = paramList->Parameters + i;
+       if ((p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) &&
+           strcmp(p->Name, name) == 0) {
+          p->Used = GL_TRUE;
+          /* Note that large uniforms may occupy several slots so we're
+           * not done searching yet.
+           */
+       }
+    }
+ }
+ 
+ 
+ /**
+  * Add a sampler to the parameter list.
+  * \param name  uniform's name
+  * \param datatype  GL_SAMPLER_2D, GL_SAMPLER_2D_RECT_ARB, etc.
+  * \param index  the sampler number (as seen in TEX instructions)
+  * \return  sampler index (starting at zero) or -1 if error
+  */
+ GLint
+ _mesa_add_sampler(struct gl_program_parameter_list *paramList,
+                   const char *name, GLenum datatype)
+ {
+    GLint i = _mesa_lookup_parameter_index(paramList, -1, name);
+    if (i >= 0 && paramList->Parameters[i].Type == PROGRAM_SAMPLER) {
+       ASSERT(paramList->Parameters[i].Size == 1);
+       ASSERT(paramList->Parameters[i].DataType == datatype);
+       /* already in list */
+       return (GLint) paramList->ParameterValues[i][0];
+    }
+    else {
+       GLuint i;
+       const GLint size = 1; /* a sampler is basically a texture unit number */
+       GLfloat value[4];
+       GLint numSamplers = 0;
+       for (i = 0; i < paramList->NumParameters; i++) {
+          if (paramList->Parameters[i].Type == PROGRAM_SAMPLER)
+             numSamplers++;
+       }
+       value[0] = (GLfloat) numSamplers;
+       value[1] = value[2] = value[3] = 0.0F;
+       (void) _mesa_add_parameter(paramList, PROGRAM_SAMPLER, name,
+                                  size, datatype, value, NULL, 0x0);
+       return numSamplers;
+    }
+ }
+ 
+ 
+ /**
+  * Add parameter representing a varying variable.
+  */
+ GLint
+ _mesa_add_varying(struct gl_program_parameter_list *paramList,
+                   const char *name, GLuint size, GLenum datatype,
+                   GLbitfield flags)
+ {
+    GLint i = _mesa_lookup_parameter_index(paramList, -1, name);
+    if (i >= 0 && paramList->Parameters[i].Type == PROGRAM_VARYING) {
+       /* already in list */
+       return i;
+    }
+    else {
+       /*assert(size == 4);*/
+       i = _mesa_add_parameter(paramList, PROGRAM_VARYING, name,
+                               size, datatype, NULL, NULL, flags);
+       return i;
+    }
+ }
+ 
+ 
+ /**
+  * Add parameter representing a vertex program attribute.
+  * \param size  size of attribute (in floats), may be -1 if unknown
+  * \param attrib  the attribute index, or -1 if unknown
+  */
+ GLint
+ _mesa_add_attribute(struct gl_program_parameter_list *paramList,
+                     const char *name, GLint size, GLenum datatype, GLint attrib)
+ {
+    GLint i = _mesa_lookup_parameter_index(paramList, -1, name);
+    if (i >= 0) {
+       /* replace */
+       if (attrib < 0)
+          attrib = i;
+       paramList->Parameters[i].StateIndexes[0] = attrib;
+    }
+    else {
+       /* add */
+       gl_state_index state[STATE_LENGTH];
+       state[0] = (gl_state_index) attrib;
+       if (size < 0)
+          size = 4;
+       i = _mesa_add_parameter(paramList, PROGRAM_INPUT, name,
+                               size, datatype, NULL, state, 0x0);
+    }
+    return i;
+ }
+ 
+ 
+ 
+ #if 0 /* not used yet */
+ /**
+  * Returns the number of 4-component registers needed to store a piece
+  * of GL state.  For matrices this may be as many as 4 registers,
+  * everything else needs
+  * just 1 register.
+  */
+ static GLuint
+ sizeof_state_reference(const GLint *stateTokens)
+ {
+    if (stateTokens[0] == STATE_MATRIX) {
+       GLuint rows = stateTokens[4] - stateTokens[3] + 1;
+       assert(rows >= 1);
+       assert(rows <= 4);
+       return rows;
+    }
+    else {
+       return 1;
+    }
+ }
+ #endif
+ 
+ 
+ /**
+  * Add a new state reference to the parameter list.
+  * This will be used when the program contains something like this:
+  *    PARAM ambient = state.material.front.ambient;
+  *
+  * \param paramList  the parameter list
+  * \param stateTokens  an array of 5 (STATE_LENGTH) state tokens
+  * \return index of the new parameter.
+  */
+ GLint
+ _mesa_add_state_reference(struct gl_program_parameter_list *paramList,
+                           const gl_state_index stateTokens[STATE_LENGTH])
+ {
+    const GLuint size = 4; /* XXX fix */
+    char *name;
+    GLint index;
+ 
+    /* Check if the state reference is already in the list */
+    for (index = 0; index < (GLint) paramList->NumParameters; index++) {
+       GLuint i, match = 0;
+       for (i = 0; i < STATE_LENGTH; i++) {
+          if (paramList->Parameters[index].StateIndexes[i] == stateTokens[i]) {
+             match++;
+          }
+          else {
+             break;
+          }
+       }
+       if (match == STATE_LENGTH) {
+          /* this state reference is already in the parameter list */
+          return index;
+       }
+    }
+ 
+    name = _mesa_program_state_string(stateTokens);
+    index = _mesa_add_parameter(paramList, PROGRAM_STATE_VAR, name,
+                                size, GL_NONE,
+                                NULL, (gl_state_index *) stateTokens, 0x0);
+    paramList->StateFlags |= _mesa_program_state_flags(stateTokens);
+ 
+    /* free name string here since we duplicated it in add_parameter() */
+    free(name);
+ 
+    return index;
+ }
+ 
+ 
+ /**
+  * Lookup a parameter value by name in the given parameter list.
+  * \return pointer to the float[4] values.
+  */
+ GLfloat *
+ _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList,
+                              GLsizei nameLen, const char *name)
+ {
+    GLint i = _mesa_lookup_parameter_index(paramList, nameLen, name);
+    if (i < 0)
+       return NULL;
+    else
+       return paramList->ParameterValues[i];
+ }
+ 
+ 
+ /**
+  * Given a program parameter name, find its position in the list of parameters.
+  * \param paramList  the parameter list to search
+  * \param nameLen  length of name (in chars).
+  *                 If length is negative, assume that name is null-terminated.
+  * \param name  the name to search for
+  * \return index of parameter in the list.
+  */
+ GLint
+ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList,
+                              GLsizei nameLen, const char *name)
+ {
+    GLint i;
+ 
+    if (!paramList)
+       return -1;
+ 
+    if (nameLen == -1) {
+       /* name is null-terminated */
+       for (i = 0; i < (GLint) paramList->NumParameters; i++) {
+          if (paramList->Parameters[i].Name &&
+ 	     strcmp(paramList->Parameters[i].Name, name) == 0)
+             return i;
+       }
+    }
+    else {
+       /* name is not null-terminated, use nameLen */
+       for (i = 0; i < (GLint) paramList->NumParameters; i++) {
+          if (paramList->Parameters[i].Name &&
+ 	     strncmp(paramList->Parameters[i].Name, name, nameLen) == 0
+              && strlen(paramList->Parameters[i].Name) == (size_t)nameLen)
+             return i;
+       }
+    }
+    return -1;
+ }
+ 
+ 
+ /**
+  * Look for a float vector in the given parameter list.  The float vector
+  * may be of length 1, 2, 3 or 4.  If swizzleOut is non-null, we'll try
+  * swizzling to find a match.
+  * \param list  the parameter list to search
+  * \param v  the float vector to search for
+  * \param vSize  number of element in v
+  * \param posOut  returns the position of the constant, if found
+  * \param swizzleOut  returns a swizzle mask describing location of the
+  *                    vector elements if found.
+  * \return GL_TRUE if found, GL_FALSE if not found
+  */
+ GLboolean
+ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
+                                 const GLfloat v[], GLuint vSize,
+                                 GLint *posOut, GLuint *swizzleOut)
+ {
+    GLuint i;
+ 
+    assert(vSize >= 1);
+    assert(vSize <= 4);
+ 
+    if (!list)
+       return -1;
+ 
+    for (i = 0; i < list->NumParameters; i++) {
+       if (list->Parameters[i].Type == PROGRAM_CONSTANT) {
+          if (!swizzleOut) {
+             /* swizzle not allowed */
+             GLuint j, match = 0;
+             for (j = 0; j < vSize; j++) {
+                if (v[j] == list->ParameterValues[i][j])
+                   match++;
+             }
+             if (match == vSize) {
+                *posOut = i;
+                return GL_TRUE;
+             }
+          }
+          else {
+             /* try matching w/ swizzle */
+              if (vSize == 1) {
+                 /* look for v[0] anywhere within float[4] value */
+                 GLuint j;
 -                for (j = 0; j < 4; j++) {
++                for (j = 0; j < list->Parameters[i].Size; j++) {
+                    if (list->ParameterValues[i][j] == v[0]) {
+                       /* found it */
+                       *posOut = i;
+                       *swizzleOut = MAKE_SWIZZLE4(j, j, j, j);
+                       return GL_TRUE;
+                    }
+                 }
+              }
+              else if (vSize <= list->Parameters[i].Size) {
+                 /* see if we can match this constant (with a swizzle) */
+                 GLuint swz[4];
+                 GLuint match = 0, j, k;
+                 for (j = 0; j < vSize; j++) {
+                    if (v[j] == list->ParameterValues[i][j]) {
+                       swz[j] = j;
+                       match++;
+                    }
+                    else {
+                       for (k = 0; k < list->Parameters[i].Size; k++) {
+                          if (v[j] == list->ParameterValues[i][k]) {
+                             swz[j] = k;
+                             match++;
+                             break;
+                          }
+                       }
+                    }
+                 }
+                 /* smear last value to remaining positions */
+                 for (; j < 4; j++)
+                    swz[j] = swz[j-1];
+ 
+                 if (match == vSize) {
+                    *posOut = i;
+                    *swizzleOut = MAKE_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
+                    return GL_TRUE;
+                 }
+              }
+          }
+       }
+    }
+ 
+    *posOut = -1;
+    return GL_FALSE;
+ }
+ 
+ 
+ struct gl_program_parameter_list *
+ _mesa_clone_parameter_list(const struct gl_program_parameter_list *list)
+ {
+    struct gl_program_parameter_list *clone;
+    GLuint i;
+ 
+    clone = _mesa_new_parameter_list();
+    if (!clone)
+       return NULL;
+ 
+    /** Not too efficient, but correct */
+    for (i = 0; i < list->NumParameters; i++) {
+       struct gl_program_parameter *p = list->Parameters + i;
+       struct gl_program_parameter *pCopy;
+       GLuint size = MIN2(p->Size, 4);
+       GLint j = _mesa_add_parameter(clone, p->Type, p->Name, size, p->DataType,
+                                     list->ParameterValues[i], NULL, 0x0);
+       ASSERT(j >= 0);
+       pCopy = clone->Parameters + j;
+       pCopy->Used = p->Used;
+       pCopy->Flags = p->Flags;
+       /* copy state indexes */
+       if (p->Type == PROGRAM_STATE_VAR) {
+          GLint k;
+          for (k = 0; k < STATE_LENGTH; k++) {
+             pCopy->StateIndexes[k] = p->StateIndexes[k];
+          }
+       }
+       else {
+          clone->Parameters[j].Size = p->Size;
+       }
+       
+    }
+ 
+    clone->StateFlags = list->StateFlags;
+ 
+    return clone;
+ }
+ 
+ 
+ /**
+  * Return a new parameter list which is listA + listB.
+  */
+ struct gl_program_parameter_list *
+ _mesa_combine_parameter_lists(const struct gl_program_parameter_list *listA,
+                               const struct gl_program_parameter_list *listB)
+ {
+    struct gl_program_parameter_list *list;
+ 
+    if (listA) {
+       list = _mesa_clone_parameter_list(listA);
+       if (list && listB) {
+          GLuint i;
+          for (i = 0; i < listB->NumParameters; i++) {
+             struct gl_program_parameter *param = listB->Parameters + i;
+             _mesa_add_parameter(list, param->Type, param->Name, param->Size,
+                                 param->DataType,
+                                 listB->ParameterValues[i],
+                                 param->StateIndexes,
+                                 param->Flags);
+          }
+       }
+    }
+    else if (listB) {
+       list = _mesa_clone_parameter_list(listB);
+    }
+    else {
+       list = NULL;
+    }
+    return list;
+ }
+ 
+ 
+ 
+ /**
+  * Find longest name of all uniform parameters in list.
+  */
+ GLuint
+ _mesa_longest_parameter_name(const struct gl_program_parameter_list *list,
+                              gl_register_file type)
+ {
+    GLuint i, maxLen = 0;
+    if (!list)
+       return 0;
+    for (i = 0; i < list->NumParameters; i++) {
+       if (list->Parameters[i].Type == type) {
+          GLuint len = strlen(list->Parameters[i].Name);
+          if (len > maxLen)
+             maxLen = len;
+       }
+    }
+    return maxLen;
+ }
+ 
+ 
+ /**
+  * Count the number of parameters in the last that match the given type.
+  */
+ GLuint
+ _mesa_num_parameters_of_type(const struct gl_program_parameter_list *list,
+                              gl_register_file type)
+ {
+    GLuint i, count = 0;
+    if (list) {
+       for (i = 0; i < list->NumParameters; i++) {
+          if (list->Parameters[i].Type == type)
+             count++;
+       }
+    }
+    return count;
+ }
diff --cc src/mesa/program/symbol_table.c
index 00000000000,6a5d6868974..3fea5ee1f1f
mode 000000,100644..100644
--- a/src/mesa/program/symbol_table.c
+++ b/src/mesa/program/symbol_table.c
@@@ -1,0 -1,362 +1,412 @@@
+ /*
+  * Copyright Â© 2008 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ 
+ #include "main/imports.h"
+ #include "symbol_table.h"
+ #include "hash_table.h"
+ 
+ struct symbol {
+     /**
+      * Link to the next symbol in the table with the same name
+      *
+      * The linked list of symbols with the same name is ordered by scope
+      * from inner-most to outer-most.
+      */
+     struct symbol *next_with_same_name;
+ 
+ 
+     /**
+      * Link to the next symbol in the table with the same scope
+      *
+      * The linked list of symbols with the same scope is unordered.  Symbols
+      * in this list my have unique names.
+      */
+     struct symbol *next_with_same_scope;
+ 
+ 
+     /**
+      * Header information for the list of symbols with the same name.
+      */
+     struct symbol_header *hdr;
+ 
+ 
+     /**
+      * Name space of the symbol
+      *
+      * Name space are arbitrary user assigned integers.  No two symbols can
+      * exist in the same name space at the same scope level.
+      */
+     int name_space;
+ 
 -    
++    /** Scope depth where this symbol was defined. */
++    unsigned depth;
++
+     /**
+      * Arbitrary user supplied data.
+      */
+     void *data;
+ };
+ 
+ 
+ /**
+  */
+ struct symbol_header {
+     /** Linkage in list of all headers in a given symbol table. */
+     struct symbol_header *next;
+ 
+     /** Symbol name. */
+     const char *name;
+ 
+     /** Linked list of symbols with the same name. */
+     struct symbol *symbols;
+ };
+ 
+ 
+ /**
+  * Element of the scope stack.
+  */
+ struct scope_level {
+     /** Link to next (inner) scope level. */
+     struct scope_level *next;
+     
+     /** Linked list of symbols with the same scope. */
+     struct symbol *symbols;
+ };
+ 
+ 
+ /**
+  *
+  */
+ struct _mesa_symbol_table {
+     /** Hash table containing all symbols in the symbol table. */
+     struct hash_table *ht;
+ 
+     /** Top of scope stack. */
+     struct scope_level *current_scope;
+ 
+     /** List of all symbol headers in the table. */
+     struct symbol_header *hdr;
++
++    /** Current scope depth. */
++    unsigned depth;
+ };
+ 
+ 
+ struct _mesa_symbol_table_iterator {
+     /**
+      * Name space of symbols returned by this iterator.
+      */
+     int name_space;
+ 
+ 
+     /**
+      * Currently iterated symbol
+      *
+      * The next call to \c _mesa_symbol_table_iterator_get will return this
+      * value.  It will also update this value to the value that should be
+      * returned by the next call.
+      */
+     struct symbol *curr;
+ };
+ 
+ 
+ static void
+ check_symbol_table(struct _mesa_symbol_table *table)
+ {
+ #if 1
+     struct scope_level *scope;
+ 
+     for (scope = table->current_scope; scope != NULL; scope = scope->next) {
+         struct symbol *sym;
+ 
+         for (sym = scope->symbols
+              ; sym != NULL
+              ; sym = sym->next_with_same_name) {
+             const struct symbol_header *const hdr = sym->hdr;
+             struct symbol *sym2;
+ 
+             for (sym2 = hdr->symbols
+                  ; sym2 != NULL
+                  ; sym2 = sym2->next_with_same_name) {
+                 assert(sym2->hdr == hdr);
+             }
+         }
+     }
+ #endif
+ }
+ 
+ void
+ _mesa_symbol_table_pop_scope(struct _mesa_symbol_table *table)
+ {
+     struct scope_level *const scope = table->current_scope;
+     struct symbol *sym = scope->symbols;
+ 
+     table->current_scope = scope->next;
++    table->depth--;
+ 
+     free(scope);
+ 
+     while (sym != NULL) {
+         struct symbol *const next = sym->next_with_same_scope;
+         struct symbol_header *const hdr = sym->hdr;
+ 
+         assert(hdr->symbols == sym);
+ 
+         hdr->symbols = sym->next_with_same_name;
+ 
+         free(sym);
+ 
+         sym = next;
+     }
+ 
+     check_symbol_table(table);
+ }
+ 
+ 
+ void
+ _mesa_symbol_table_push_scope(struct _mesa_symbol_table *table)
+ {
+     struct scope_level *const scope = calloc(1, sizeof(*scope));
+     
+     scope->next = table->current_scope;
+     table->current_scope = scope;
++    table->depth++;
+ }
+ 
+ 
+ static struct symbol_header *
+ find_symbol(struct _mesa_symbol_table *table, const char *name)
+ {
+     return (struct symbol_header *) hash_table_find(table->ht, name);
+ }
+ 
+ 
+ struct _mesa_symbol_table_iterator *
+ _mesa_symbol_table_iterator_ctor(struct _mesa_symbol_table *table,
+                                  int name_space, const char *name)
+ {
+     struct _mesa_symbol_table_iterator *iter = calloc(1, sizeof(*iter));
+     struct symbol_header *const hdr = find_symbol(table, name);
+     
+     iter->name_space = name_space;
+ 
+     if (hdr != NULL) {
+         struct symbol *sym;
+ 
+         for (sym = hdr->symbols; sym != NULL; sym = sym->next_with_same_name) {
+             assert(sym->hdr == hdr);
+ 
+             if ((name_space == -1) || (sym->name_space == name_space)) {
+                 iter->curr = sym;
+                 break;
+             }
+         }
+     }
+ 
+     return iter;
+ }
+ 
+ 
+ void
+ _mesa_symbol_table_iterator_dtor(struct _mesa_symbol_table_iterator *iter)
+ {
+     free(iter);
+ }
+ 
+ 
+ void *
+ _mesa_symbol_table_iterator_get(struct _mesa_symbol_table_iterator *iter)
+ {
+     return (iter->curr == NULL) ? NULL : iter->curr->data;
+ }
+ 
+ 
+ int
+ _mesa_symbol_table_iterator_next(struct _mesa_symbol_table_iterator *iter)
+ {
+     struct symbol_header *hdr;
+ 
+     if (iter->curr == NULL) {
+         return 0;
+     }
+ 
+     hdr = iter->curr->hdr;
+     iter->curr = iter->curr->next_with_same_name;
+ 
+     while (iter->curr != NULL) {
+         assert(iter->curr->hdr == hdr);
+ 
+         if ((iter->name_space == -1)
+             || (iter->curr->name_space == iter->name_space)) {
+             return 1;
+         }
+ 
+         iter->curr = iter->curr->next_with_same_name;
+     }
+ 
+     return 0;
+ }
+ 
+ 
++/**
++ * Determine the scope "distance" of a symbol from the current scope
++ *
++ * \return
++ * A non-negative number for the number of scopes between the current scope
++ * and the scope where a symbol was defined.  A value of zero means the current
++ * scope.  A negative number if the symbol does not exist.
++ */
++int
++_mesa_symbol_table_symbol_scope(struct _mesa_symbol_table *table,
++				int name_space, const char *name)
++{
++    struct symbol_header *const hdr = find_symbol(table, name);
++    struct symbol *sym;
++
++    if (hdr != NULL) {
++       for (sym = hdr->symbols; sym != NULL; sym = sym->next_with_same_name) {
++	  assert(sym->hdr == hdr);
++
++	  if ((name_space == -1) || (sym->name_space == name_space)) {
++	     assert(sym->depth <= table->depth);
++	     return sym->depth - table->depth;
++	  }
++       }
++    }
++
++    return -1;
++}
++
++
+ void *
+ _mesa_symbol_table_find_symbol(struct _mesa_symbol_table *table,
+                                int name_space, const char *name)
+ {
+     struct symbol_header *const hdr = find_symbol(table, name);
+ 
+     if (hdr != NULL) {
+         struct symbol *sym;
+ 
+ 
+         for (sym = hdr->symbols; sym != NULL; sym = sym->next_with_same_name) {
+             assert(sym->hdr == hdr);
+ 
+             if ((name_space == -1) || (sym->name_space == name_space)) {
+                 return sym->data;
+             }
+         }
+     }
+ 
+     return NULL;
+ }
+ 
+ 
+ int
+ _mesa_symbol_table_add_symbol(struct _mesa_symbol_table *table,
+                               int name_space, const char *name,
+                               void *declaration)
+ {
+     struct symbol_header *hdr;
+     struct symbol *sym;
+ 
+     check_symbol_table(table);
+ 
+     hdr = find_symbol(table, name);
+ 
+     check_symbol_table(table);
+ 
+     if (hdr == NULL) {
+         hdr = calloc(1, sizeof(*hdr));
+         hdr->name = name;
+ 
+         hash_table_insert(table->ht, hdr, name);
+ 	hdr->next = table->hdr;
+ 	table->hdr = hdr;
+     }
+ 
+     check_symbol_table(table);
+ 
++    /* If the symbol already exists in this namespace at this scope, it cannot
++     * be added to the table.
++     */
++    for (sym = hdr->symbols
++	 ; (sym != NULL) && (sym->name_space != name_space)
++	 ; sym = sym->next_with_same_name) {
++       /* empty */
++    }
++
++    if (sym && (sym->depth == table->depth))
++       return -1;
++
+     sym = calloc(1, sizeof(*sym));
+     sym->next_with_same_name = hdr->symbols;
+     sym->next_with_same_scope = table->current_scope->symbols;
+     sym->hdr = hdr;
+     sym->name_space = name_space;
+     sym->data = declaration;
++    sym->depth = table->depth;
+ 
+     assert(sym->hdr == hdr);
+ 
+     hdr->symbols = sym;
+     table->current_scope->symbols = sym;
+ 
+     check_symbol_table(table);
+     return 0;
+ }
+ 
+ 
+ struct _mesa_symbol_table *
+ _mesa_symbol_table_ctor(void)
+ {
+     struct _mesa_symbol_table *table = calloc(1, sizeof(*table));
+ 
+     if (table != NULL) {
+        table->ht = hash_table_ctor(32, hash_table_string_hash,
+ 				   hash_table_string_compare);
+ 
+        _mesa_symbol_table_push_scope(table);
+     }
+ 
+     return table;
+ }
+ 
+ 
+ void
+ _mesa_symbol_table_dtor(struct _mesa_symbol_table *table)
+ {
+    struct symbol_header *hdr;
+    struct symbol_header *next;
+ 
+    while (table->current_scope != NULL) {
+       _mesa_symbol_table_pop_scope(table);
+    }
+ 
+    for (hdr = table->hdr; hdr != NULL; hdr = next) {
+        next = hdr->next;
+        free(hdr);
+    }
+ 
+    hash_table_dtor(table->ht);
+    free(table);
+ }
diff --cc src/mesa/program/symbol_table.h
index 00000000000,0c054ef1396..1d570fc1a09
mode 000000,100644..100644
--- a/src/mesa/program/symbol_table.h
+++ b/src/mesa/program/symbol_table.h
@@@ -1,0 -1,55 +1,58 @@@
+ /*
+  * Copyright Â© 2008 Intel Corporation
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+  * to deal in the Software without restriction, including without limitation
+  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the
+  * Software is furnished to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice (including the next
+  * paragraph) shall be included in all copies or substantial portions of the
+  * Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+  * DEALINGS IN THE SOFTWARE.
+  */
+ #ifndef MESA_SYMBOL_TABLE_H
+ #define MESA_SYMBOL_TABLE_H
+ 
+ struct _mesa_symbol_table;
+ struct _mesa_symbol_table_iterator;
+ 
+ extern void _mesa_symbol_table_push_scope(struct _mesa_symbol_table *table);
+ 
+ extern void _mesa_symbol_table_pop_scope(struct _mesa_symbol_table *table);
+ 
+ extern int _mesa_symbol_table_add_symbol(struct _mesa_symbol_table *symtab,
+     int name_space, const char *name, void *declaration);
+ 
++extern int _mesa_symbol_table_symbol_scope(struct _mesa_symbol_table *table,
++    int name_space, const char *name);
++
+ extern void *_mesa_symbol_table_find_symbol(
+     struct _mesa_symbol_table *symtab, int name_space, const char *name);
+ 
+ extern struct _mesa_symbol_table *_mesa_symbol_table_ctor(void);
+ 
+ extern void _mesa_symbol_table_dtor(struct _mesa_symbol_table *);
+ 
+ extern struct _mesa_symbol_table_iterator *_mesa_symbol_table_iterator_ctor(
+     struct _mesa_symbol_table *table, int name_space, const char *name);
+ 
+ extern void _mesa_symbol_table_iterator_dtor(
+     struct _mesa_symbol_table_iterator *);
+ 
+ extern void *_mesa_symbol_table_iterator_get(
+     struct _mesa_symbol_table_iterator *iter);
+ 
+ extern int _mesa_symbol_table_iterator_next(
+     struct _mesa_symbol_table_iterator *iter);
+ 
+ #endif /* MESA_SYMBOL_TABLE_H */
diff --cc src/mesa/sources.mak
index 117b3f3d2b9,f01b60c4fc8..373f1b50d05
--- a/src/mesa/sources.mak
+++ b/src/mesa/sources.mak
@@@ -223,56 -228,48 +228,51 @@@ STATETRACKER_SOURCES = 
  	state_tracker/st_program.c \
  	state_tracker/st_texture.c
  
- SHADER_SOURCES = \
- 	shader/arbprogparse.c \
- 	shader/arbprogram.c \
- 	shader/atifragshader.c \
- 	shader/hash_table.c \
- 	shader/lex.yy.c \
- 	shader/nvfragparse.c \
- 	shader/nvprogram.c \
- 	shader/nvvertparse.c \
- 	shader/program.c \
- 	shader/program_parse.tab.c \
- 	shader/program_parse_extra.c \
- 	shader/prog_cache.c \
- 	shader/prog_execute.c \
- 	shader/prog_instruction.c \
- 	shader/prog_noise.c \
- 	shader/prog_optimize.c \
- 	shader/prog_parameter.c \
- 	shader/prog_parameter_layout.c \
- 	shader/prog_print.c \
- 	shader/prog_statevars.c \
- 	shader/prog_uniform.c \
- 	shader/programopt.c \
- 	shader/symbol_table.c \
- 	shader/shader_api.c \
- 	shader/uniforms.c
+ PROGRAM_SOURCES = \
+ 	program/arbprogparse.c \
+ 	program/hash_table.c \
+ 	program/lex.yy.c \
+ 	program/nvfragparse.c \
+ 	program/nvvertparse.c \
+ 	program/program.c \
+ 	program/program_parse.tab.c \
+ 	program/program_parse_extra.c \
+ 	program/prog_cache.c \
+ 	program/prog_execute.c \
+ 	program/prog_instruction.c \
+ 	program/prog_noise.c \
+ 	program/prog_optimize.c \
+ 	program/prog_parameter.c \
+ 	program/prog_parameter_layout.c \
+ 	program/prog_print.c \
+ 	program/prog_statevars.c \
+ 	program/prog_uniform.c \
+ 	program/programopt.c \
+ 	program/symbol_table.c
  
 +SHADER_CXX_SOURCES = \
- 	shader/ir_to_mesa.cpp
++	program/ir_to_mesa.cpp
 +
  SLANG_SOURCES =	\
- 	shader/slang/slang_builtin.c	\
- 	shader/slang/slang_codegen.c	\
- 	shader/slang/slang_compile.c	\
- 	shader/slang/slang_compile_function.c	\
- 	shader/slang/slang_compile_operation.c	\
- 	shader/slang/slang_compile_struct.c	\
- 	shader/slang/slang_compile_variable.c	\
- 	shader/slang/slang_emit.c	\
- 	shader/slang/slang_ir.c	\
- 	shader/slang/slang_label.c	\
- 	shader/slang/slang_link.c	\
- 	shader/slang/slang_log.c	\
- 	shader/slang/slang_mem.c	\
- 	shader/slang/slang_print.c	\
- 	shader/slang/slang_simplify.c	\
- 	shader/slang/slang_storage.c	\
- 	shader/slang/slang_typeinfo.c	\
- 	shader/slang/slang_vartable.c	\
- 	shader/slang/slang_utility.c
+ 	slang/slang_builtin.c	\
+ 	slang/slang_codegen.c	\
+ 	slang/slang_compile.c	\
+ 	slang/slang_compile_function.c	\
+ 	slang/slang_compile_operation.c	\
+ 	slang/slang_compile_struct.c	\
+ 	slang/slang_compile_variable.c	\
+ 	slang/slang_emit.c	\
+ 	slang/slang_ir.c	\
+ 	slang/slang_label.c	\
+ 	slang/slang_link.c	\
+ 	slang/slang_log.c	\
+ 	slang/slang_mem.c	\
+ 	slang/slang_print.c	\
+ 	slang/slang_simplify.c	\
+ 	slang/slang_storage.c	\
+ 	slang/slang_typeinfo.c	\
+ 	slang/slang_vartable.c	\
+ 	slang/slang_utility.c
  
  ASM_C_SOURCES =	\
  	x86/common_x86.c \
@@@ -338,12 -333,10 +338,12 @@@ MESA_GALLIUM_SOURCES = 
  	$(MATH_SOURCES)		\
  	$(VBO_SOURCES)		\
  	$(STATETRACKER_SOURCES)	\
- 	$(SHADER_SOURCES)	\
+ 	$(PROGRAM_SOURCES)	\
  	ppc/common_ppc.c	\
 -	x86/common_x86.c	\
 -	$(SLANG_SOURCES)
 +	x86/common_x86.c
 +
 +MESA_GALLIUM_CXX_SOURCES = \
 +	 $(SHADER_CXX_SOURCES)
  
  # All the core C sources, for dependency checking
  ALL_SOURCES = \