i915g: introduce the tiny shader optimizer.
authorStéphane Marchesin <marcheu@chromium.org>
Tue, 5 Jul 2011 03:21:37 +0000 (20:21 -0700)
committerStéphane Marchesin <marcheu@chromium.org>
Wed, 6 Jul 2011 09:49:48 +0000 (02:49 -0700)
src/gallium/drivers/i915/Makefile
src/gallium/drivers/i915/SConscript
src/gallium/drivers/i915/i915_fpc.h
src/gallium/drivers/i915/i915_fpc_emit.c
src/gallium/drivers/i915/i915_fpc_optimize.c [new file with mode: 0644]
src/gallium/drivers/i915/i915_fpc_translate.c

index 7781247..36197fb 100644 (file)
@@ -27,6 +27,7 @@ C_SOURCES = \
        i915_resource_buffer.c \
        i915_fpc_emit.c \
        i915_fpc_translate.c \
+       i915_fpc_optimize.c \
        i915_surface.c 
 
 include ../../Makefile.template
index 9837060..76f5970 100644 (file)
@@ -14,6 +14,7 @@ i915 = env.ConvenienceLibrary(
                'i915_flush.c',
                'i915_fpc_emit.c',
                'i915_fpc_translate.c',
+               'i915_fpc_optimize.c',
                'i915_prim_emit.c',
                'i915_prim_vbuf.c',
                'i915_query.c',
index 509395c..b760bc4 100644 (file)
@@ -33,7 +33,9 @@
 #include "i915_context.h"
 #include "i915_reg.h"
 
+#include "pipe/p_shader_tokens.h"
 
+#include "tgsi/tgsi_parse.h"
 
 #define I915_PROGRAM_SIZE 192
 
@@ -207,4 +209,90 @@ extern void
 i915_program_error(struct i915_fp_compile *p, const char *msg, ...);
 
 
+/*======================================================================
+ * i915_fpc_optimize.c
+ */
+
+
+struct i915_src_register
+{
+   unsigned File        : 4;  /* TGSI_FILE_ */
+   unsigned Indirect    : 1;  /* BOOL */
+   unsigned Dimension   : 1;  /* BOOL */
+   int      Index       : 16; /* SINT */
+   unsigned SwizzleX    : 3;  /* TGSI_SWIZZLE_ */
+   unsigned SwizzleY    : 3;  /* TGSI_SWIZZLE_ */
+   unsigned SwizzleZ    : 3;  /* TGSI_SWIZZLE_ */
+   unsigned SwizzleW    : 3;  /* TGSI_SWIZZLE_ */
+   unsigned Absolute    : 1;    /* BOOL */
+   unsigned Negate      : 1;    /* BOOL */
+};
+
+/* Additional swizzle supported in i915 */
+#define TGSI_SWIZZLE_ZERO 4
+#define TGSI_SWIZZLE_ONE 5
+
+struct i915_dst_register
+{
+   unsigned File        : 4;  /* TGSI_FILE_ */
+   unsigned WriteMask   : 4;  /* TGSI_WRITEMASK_ */
+   unsigned Indirect    : 1;  /* BOOL */
+   unsigned Dimension   : 1;  /* BOOL */
+   int      Index       : 16; /* SINT */
+   unsigned Padding     : 6;
+};
+
+
+struct i915_full_dst_register
+{
+   struct i915_dst_register               Register;
+/*
+   struct tgsi_src_register               Indirect;
+   struct tgsi_dimension                  Dimension;
+   struct tgsi_src_register               DimIndirect;
+*/
+};
+
+struct i915_full_src_register
+{
+   struct i915_src_register         Register;
+/*
+   struct tgsi_src_register         Indirect;
+   struct tgsi_dimension            Dimension;
+   struct tgsi_src_register         DimIndirect;
+*/
+};
+
+struct i915_full_instruction
+{
+   struct tgsi_instruction             Instruction;
+/*
+   struct tgsi_instruction_predicate   Predicate;
+   struct tgsi_instruction_label       Label;
+*/
+   struct tgsi_instruction_texture     Texture;
+   struct i915_full_dst_register       Dst[1];
+   struct i915_full_src_register       Src[3];
+};
+
+
+union i915_full_token
+{
+   struct tgsi_token             Token;
+   struct tgsi_full_declaration  FullDeclaration;
+   struct tgsi_full_immediate    FullImmediate;
+   struct i915_full_instruction  FullInstruction;
+   struct tgsi_full_property     FullProperty;
+};
+
+struct i915_token_list
+{
+   union i915_full_token*     Tokens;
+   unsigned                   NumTokens;
+};
+
+extern struct i915_token_list* i915_optimize(const struct tgsi_token *tokens);
+
+extern void i915_optimize_free(struct i915_token_list* tokens);
+
 #endif
index d28595e..c4a42df 100644 (file)
@@ -369,7 +369,6 @@ i915_emit_const4f(struct i915_fp_compile * p,
 
    // XXX emit swizzle here for 0, 1, -1 and any combination thereof
    // we can use swizzle + neg for that
-   printf("const %f %f %f %f\n",c0,c1,c2,c3);
    for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
       if (ifs->constant_flags[reg] == 0xf &&
           ifs->constants[reg][0] == c0 &&
diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c
new file mode 100644 (file)
index 0000000..5c60d95
--- /dev/null
@@ -0,0 +1,182 @@
+/**************************************************************************
+ * 
+ * Copyright 2011 The Chromium OS authors.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_fpc.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+
+static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2)
+{
+   return (d1->Register.File == d2->Register.File &&
+           d1->Register.Indirect == d2->Register.Indirect &&
+           d1->Register.Dimension == d2->Register.Dimension &&
+           d1->Register.Index == d2->Register.Index);
+}
+
+static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2)
+{
+   return (d1->Register.File == d2->Register.File &&
+           d1->Register.Indirect == d2->Register.Indirect &&
+           d1->Register.Dimension == d2->Register.Dimension &&
+           d1->Register.Index == d2->Register.Index &&
+           d1->Register.Absolute == d2->Register.Absolute &&
+           d1->Register.Negate == d2->Register.Negate);
+}
+
+
+/*
+ * Optimize away things like:
+ *    MUL OUT[0].xyz, TEMP[1], TEMP[2]
+ *    MOV OUT[0].w, TEMP[2]
+ * into: 
+ *    MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
+ * This is useful for optimizing texenv.
+ */
+static void i915_fpc_optimize_mov_after_mul(union i915_full_token* current, union i915_full_token* next)
+{
+   if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
+        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
+        current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL &&
+        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
+        current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ &&
+        next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W &&
+        same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) &&
+        same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) )
+   {
+      next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
+      current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+      current->FullInstruction.Src[0].Register.SwizzleW = TGSI_SWIZZLE_ONE;
+      return;
+   }
+
+   if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
+        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
+        current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MUL &&
+        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
+        current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZ &&
+        next->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_W &&
+        same_dst_reg(&next->FullInstruction.Dst[0], &next->FullInstruction.Dst[0]) &&
+        same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) )
+   {
+      next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
+      current->FullInstruction.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+      current->FullInstruction.Src[1].Register.SwizzleW = TGSI_SWIZZLE_ONE;
+      return;
+   }
+}
+
+static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
+{
+   o->File      = i->File;
+   o->Indirect  = i->Indirect;
+   o->Dimension = i->Dimension;
+   o->Index     = i->Index;
+   o->SwizzleX  = i->SwizzleX;
+   o->SwizzleY  = i->SwizzleY;
+   o->SwizzleZ  = i->SwizzleZ;
+   o->SwizzleW  = i->SwizzleW;
+   o->Absolute  = i->Absolute;
+   o->Negate    = i->Negate;
+}
+
+static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
+{
+   o->File      = i->File;
+   o->WriteMask = i->WriteMask;
+   o->Indirect  = i->Indirect;
+   o->Dimension = i->Dimension;
+   o->Index     = i->Index;
+}
+
+static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
+{
+   memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
+   memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
+
+   copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
+
+   copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
+   copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
+   copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
+}
+
+static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
+{
+   if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
+      memcpy(o, i, sizeof(*o));
+   else
+      copy_instruction(&o->FullInstruction, &i->FullInstruction);
+
+}
+
+struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
+{
+   struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
+   out_tokens->NumTokens = 0;
+   struct tgsi_parse_context parse;
+   int i = 0;
+
+   /* Count the tokens */
+   tgsi_parse_init( &parse, tokens );
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      tgsi_parse_token( &parse );
+      out_tokens->NumTokens++;
+   }
+   tgsi_parse_free (&parse);
+
+   /* Allocate our tokens */
+   out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
+
+   tgsi_parse_init( &parse, tokens );
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      tgsi_parse_token( &parse );
+      copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
+
+      if (i > 0)
+         i915_fpc_optimize_mov_after_mul(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
+
+      i++;
+   }
+   tgsi_parse_free (&parse);
+
+   return out_tokens;
+}
+
+void i915_optimize_free(struct i915_token_list* tokens)
+{
+   free(tokens->Tokens);
+   free(tokens);
+}
+
+
index df606e2..e19d9be 100644 (file)
@@ -172,7 +172,7 @@ static uint get_mapping(struct i915_fragment_shader* fs, int unit)
  */
 static uint
 src_vector(struct i915_fp_compile *p,
-           const struct tgsi_full_src_register *source,
+           const struct i915_full_src_register *source,
            struct i915_fragment_shader* fs)
 {
    uint index = source->Register.Index;
@@ -287,7 +287,7 @@ src_vector(struct i915_fp_compile *p,
  */
 static uint
 get_result_vector(struct i915_fp_compile *p,
-                  const struct tgsi_full_dst_register *dest)
+                  const struct i915_full_dst_register *dest)
 {
    switch (dest->Register.File) {
    case TGSI_FILE_OUTPUT:
@@ -316,7 +316,7 @@ get_result_vector(struct i915_fp_compile *p,
  * Compute flags for saturation and writemask.
  */
 static uint
-get_result_flags(const struct tgsi_full_instruction *inst)
+get_result_flags(const struct i915_full_instruction *inst)
 {
    const uint writeMask
       = inst->Dst[0].Register.WriteMask;
@@ -378,7 +378,7 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex)
  */
 static void
 emit_tex(struct i915_fp_compile *p,
-         const struct tgsi_full_instruction *inst,
+         const struct i915_full_instruction *inst,
          uint opcode,
          struct i915_fragment_shader* fs)
 {
@@ -404,7 +404,7 @@ emit_tex(struct i915_fp_compile *p,
  */
 static void
 emit_simple_arith(struct i915_fp_compile *p,
-                  const struct tgsi_full_instruction *inst,
+                  const struct i915_full_instruction *inst,
                   uint opcode, uint numArgs,
                   struct i915_fragment_shader* fs)
 {
@@ -429,11 +429,11 @@ emit_simple_arith(struct i915_fp_compile *p,
 /** As above, but swap the first two src regs */
 static void
 emit_simple_arith_swap2(struct i915_fp_compile *p,
-                        const struct tgsi_full_instruction *inst,
+                        const struct i915_full_instruction *inst,
                         uint opcode, uint numArgs,
                         struct i915_fragment_shader* fs)
 {
-   struct tgsi_full_instruction inst2;
+   struct i915_full_instruction inst2;
 
    assert(numArgs == 2);
 
@@ -457,7 +457,7 @@ emit_simple_arith_swap2(struct i915_fp_compile *p,
  */ 
 static void
 i915_translate_instruction(struct i915_fp_compile *p,
-                           const struct tgsi_full_instruction *inst,
+                           const struct i915_full_instruction *inst,
                            struct i915_fragment_shader *fs)
 {
    uint writemask;
@@ -728,6 +728,9 @@ i915_translate_instruction(struct i915_fp_compile *p,
       emit_simple_arith(p, inst, A0_MUL, 2, fs);
       break;
 
+   case TGSI_OPCODE_NOP:
+      break;
+
    case TGSI_OPCODE_POW:
       src0 = src_vector(p, &inst->Src[0], fs);
       src1 = src_vector(p, &inst->Src[1], fs);
@@ -1044,107 +1047,107 @@ i915_translate_instruction(struct i915_fp_compile *p,
 }
 
 
-/**
- * Translate TGSI fragment shader into i915 hardware instructions.
- * \param p  the translation state
- * \param tokens  the TGSI token array
- */
-static void
-i915_translate_instructions(struct i915_fp_compile *p,
-                            const struct tgsi_token *tokens,
-                            struct i915_fragment_shader *fs)
+static void i915_translate_token(struct i915_fp_compile *p,
+                                 const union i915_full_token* token,
+                                 struct i915_fragment_shader *fs)
 {
    struct i915_fragment_shader *ifs = p->shader;
-   struct tgsi_parse_context parse;
-
-   tgsi_parse_init( &parse, tokens );
-
-   while( !tgsi_parse_end_of_tokens( &parse ) ) {
-
-      tgsi_parse_token( &parse );
+   switch( token->Token.Type ) {
+   case TGSI_TOKEN_TYPE_PROPERTY:
+      /*
+       * We only support one cbuf, but we still need to ignore the property
+       * correctly so we don't hit the assert at the end of the switch case.
+       */
+      assert(token->FullProperty.Property.PropertyName ==
+             TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS);
+      break;
 
-      switch( parse.FullToken.Token.Type ) {
-      case TGSI_TOKEN_TYPE_PROPERTY:
-         /*
-          * We only support one cbuf, but we still need to ignore the property
-          * correctly so we don't hit the assert at the end of the switch case.
-          */
-         assert(parse.FullToken.FullProperty.Property.PropertyName ==
-                TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS);
-         break;
-      case TGSI_TOKEN_TYPE_DECLARATION:
-         if (parse.FullToken.FullDeclaration.Declaration.File
-                  == TGSI_FILE_CONSTANT) {
-            uint i;
-            for (i = parse.FullToken.FullDeclaration.Range.First;
-                 i <= parse.FullToken.FullDeclaration.Range.Last;
-                 i++) {
-               assert(ifs->constant_flags[i] == 0x0);
-               ifs->constant_flags[i] = I915_CONSTFLAG_USER;
-               ifs->num_constants = MAX2(ifs->num_constants, i + 1);
-            }
+   case TGSI_TOKEN_TYPE_DECLARATION:
+      if (token->FullDeclaration.Declaration.File
+               == TGSI_FILE_CONSTANT) {
+         uint i;
+         for (i = token->FullDeclaration.Range.First;
+              i <= token->FullDeclaration.Range.Last;
+              i++) {
+            assert(ifs->constant_flags[i] == 0x0);
+            ifs->constant_flags[i] = I915_CONSTFLAG_USER;
+            ifs->num_constants = MAX2(ifs->num_constants, i + 1);
          }
-         else if (parse.FullToken.FullDeclaration.Declaration.File
-                  == TGSI_FILE_TEMPORARY) {
-            uint i;
-            for (i = parse.FullToken.FullDeclaration.Range.First;
-                 i <= parse.FullToken.FullDeclaration.Range.Last;
-                 i++) {
-               if (i >= I915_MAX_TEMPORARY)
-                  debug_printf("Too many temps (%d)\n",i);
-              else
-                  /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
-                  p->temp_flag |= (1 << i); /* mark temp as used */
-            }
+      }
+      else if (token->FullDeclaration.Declaration.File
+               == TGSI_FILE_TEMPORARY) {
+         uint i;
+         for (i = token->FullDeclaration.Range.First;
+              i <= token->FullDeclaration.Range.Last;
+              i++) {
+            if (i >= I915_MAX_TEMPORARY)
+               debug_printf("Too many temps (%d)\n",i);
+            else
+               /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
+               p->temp_flag |= (1 << i); /* mark temp as used */
          }
-         break;
+      }
+      break;
 
-      case TGSI_TOKEN_TYPE_IMMEDIATE:
-         {
-            const struct tgsi_full_immediate *imm
-               = &parse.FullToken.FullImmediate;
-            const uint pos = p->num_immediates++;
-            uint j;
-            assert( imm->Immediate.NrTokens <= 4 + 1 );
-            for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
-               p->immediates[pos][j] = imm->u[j].Float;
-            }
+   case TGSI_TOKEN_TYPE_IMMEDIATE:
+      {
+         const struct tgsi_full_immediate *imm
+            = &token->FullImmediate;
+         const uint pos = p->num_immediates++;
+         uint j;
+         assert( imm->Immediate.NrTokens <= 4 + 1 );
+         for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
+            p->immediates[pos][j] = imm->u[j].Float;
          }
-         break;
+      }
+      break;
 
-      case TGSI_TOKEN_TYPE_INSTRUCTION:
-         if (p->first_instruction) {
-            /* resolve location of immediates */
-            uint i, j;
-            for (i = 0; i < p->num_immediates; i++) {
-               /* find constant slot for this immediate */
-               for (j = 0; j < I915_MAX_CONSTANT; j++) {
-                  if (ifs->constant_flags[j] == 0x0) {
-                     memcpy(ifs->constants[j],
-                            p->immediates[i],
-                            4 * sizeof(float));
-                     /*printf("immediate %d maps to const %d\n", i, j);*/
-                     ifs->constant_flags[j] = 0xf;  /* all four comps used */
-                     p->immediates_map[i] = j;
-                     ifs->num_constants = MAX2(ifs->num_constants, j + 1);
-                     break;
-                  }
+   case TGSI_TOKEN_TYPE_INSTRUCTION:
+      if (p->first_instruction) {
+         /* resolve location of immediates */
+         uint i, j;
+         for (i = 0; i < p->num_immediates; i++) {
+            /* find constant slot for this immediate */
+            for (j = 0; j < I915_MAX_CONSTANT; j++) {
+               if (ifs->constant_flags[j] == 0x0) {
+                  memcpy(ifs->constants[j],
+                         p->immediates[i],
+                         4 * sizeof(float));
+                  /*printf("immediate %d maps to const %d\n", i, j);*/
+                  ifs->constant_flags[j] = 0xf;  /* all four comps used */
+                  p->immediates_map[i] = j;
+                  ifs->num_constants = MAX2(ifs->num_constants, j + 1);
+                  break;
                }
             }
-
-            p->first_instruction = FALSE;
          }
 
-         i915_translate_instruction(p, &parse.FullToken.FullInstruction, fs);
-         break;
-
-      default:
-         assert( 0 );
+         p->first_instruction = FALSE;
       }
 
-   } /* while */
+      i915_translate_instruction(p, &token->FullInstruction, fs);
+      break;
+
+   default:
+      assert( 0 );
+   }
 
-   tgsi_parse_free (&parse);
+}
+
+/**
+ * Translate TGSI fragment shader into i915 hardware instructions.
+ * \param p  the translation state
+ * \param tokens  the TGSI token array
+ */
+static void
+i915_translate_instructions(struct i915_fp_compile *p,
+                            const struct i915_token_list *tokens,
+                            struct i915_fragment_shader *fs)
+{
+   int i;
+   for(i = 0; i<tokens->NumTokens; i++) {
+      i915_translate_token(p, &tokens->Tokens[i], fs);
+   }
 }
 
 
@@ -1303,8 +1306,10 @@ i915_translate_fragment_program( struct i915_context *i915,
 
    p = i915_init_compile(i915, fs);
 
-   i915_translate_instructions(p, tokens, fs);
+   struct i915_token_list* i_tokens = i915_optimize(tokens);
+   i915_translate_instructions(p, i_tokens, fs);
    i915_fixup_depth_write(p);
 
    i915_fini_compile(i915, p);
+   i915_optimize_free(i_tokens);
 }