llvmpipe: Code generate interpolators.
authorJosé Fonseca <jfonseca@vmware.com>
Fri, 14 Aug 2009 09:03:46 +0000 (10:03 +0100)
committerJosé Fonseca <jfonseca@vmware.com>
Sat, 29 Aug 2009 08:21:32 +0000 (09:21 +0100)
src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
src/gallium/drivers/llvmpipe/lp_fs_llvm.c

index 020db00..eb50462 100644 (file)
@@ -44,7 +44,10 @@ void
 lp_build_tgsi_soa(LLVMBuilderRef builder,
                   const struct tgsi_token *tokens,
                   union lp_type type,
-                  LLVMValueRef (*inputs)[4],
+                  LLVMValueRef *pos,
+                  LLVMValueRef a0_ptr,
+                  LLVMValueRef dadx_ptr,
+                  LLVMValueRef dady_ptr,
                   LLVMValueRef consts_ptr,
                   LLVMValueRef (*outputs)[4],
                   LLVMValueRef samplers_ptr);
index c9143eb..1f489a3 100644 (file)
@@ -69,13 +69,21 @@ struct lp_build_tgsi_soa_context
 {
    struct lp_build_context base;
 
-   LLVMValueRef (*inputs)[4];
+   LLVMValueRef x, y, w;
+   LLVMValueRef a0_ptr;
+   LLVMValueRef dadx_ptr;
+   LLVMValueRef dady_ptr;
+
    LLVMValueRef consts_ptr;
-   LLVMValueRef (*outputs)[4];
+   LLVMValueRef (*outputs)[NUM_CHANNELS];
    LLVMValueRef samplers_ptr;
 
-   LLVMValueRef immediates[LP_MAX_IMMEDIATES][4];
-   LLVMValueRef temps[LP_MAX_TEMPS][4];
+   LLVMValueRef oow;
+
+   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+
+   LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
+   LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
 
    /** Coords/texels store */
    LLVMValueRef store_ptr;
@@ -1339,48 +1347,70 @@ emit_declaration(
    struct lp_build_tgsi_soa_context *bld,
    struct tgsi_full_declaration *decl )
 {
-#if 0
    if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+      LLVMBuilderRef builder = bld->base.builder;
       unsigned first, last, mask;
-      unsigned i, j;
-      LLVMValueRef tmp;
+      unsigned attrib, chan;
 
       first = decl->DeclarationRange.First;
       last = decl->DeclarationRange.Last;
       mask = decl->Declaration.UsageMask;
 
-      for( i = first; i <= last; i++ ) {
-         for( j = 0; j < NUM_CHANNELS; j++ ) {
-            if( mask & (1 << j) ) {
-               switch( decl->Declaration.Interpolate ) {
-               case TGSI_INTERPOLATE_CONSTANT:
-                  bld->inputs[i][j] = bld->interp_coefs[i].a0[j];
-                  break;
+      for( attrib = first; attrib <= last; attrib++ ) {
+         for( chan = 0; chan < NUM_CHANNELS; chan++ ) {
+            LLVMValueRef input = bld->base.undef;
 
-               case TGSI_INTERPOLATE_LINEAR:
-                  tmp = bld->interp_coefs[i].a0[j];
-                  tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[0], bld->interp_coefs[i].dadx[j]));
-                  tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[1], bld->interp_coefs[i].dady[j]));
-                  bld->inputs[i][j] = tmp;
-                  break;
+            if( mask & (1 << chan) ) {
+               LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
+               LLVMValueRef a0;
+               LLVMValueRef dadx;
+               LLVMValueRef dady;
+               char name[32];
 
+               switch( decl->Declaration.Interpolate ) {
                case TGSI_INTERPOLATE_PERSPECTIVE:
-                  tmp = bld->interp_coefs[i].a0[j];
-                  tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[0], bld->interp_coefs[i].dadx[j]));
-                  tmp = lp_build_add(&bld->base, tmp, lp_build_mul(&bld->base, bld->pos[1], bld->interp_coefs[i].dady[j]));
-                  tmp = lp_build_div(&bld->base, tmp, bld->pos[3]);
-                  bld->inputs[i][j] = tmp;
+               case TGSI_INTERPOLATE_LINEAR: {
+                  LLVMValueRef dadx_ptr = LLVMBuildGEP(builder, bld->dadx_ptr, &index, 1, "");
+                  LLVMValueRef dady_ptr = LLVMBuildGEP(builder, bld->dady_ptr, &index, 1, "");
+                  util_snprintf(name, sizeof name, "dadx_%u.%c", attrib, "xyzw"[chan]);
+                  dadx = LLVMBuildLoad(builder, dadx_ptr, name);
+                  util_snprintf(name, sizeof name, "dady_%u.%c", attrib, "xyzw"[chan]);
+                  dady = LLVMBuildLoad(builder, dady_ptr, name);
+               }
+
+               case TGSI_INTERPOLATE_CONSTANT: {
+                  LLVMValueRef a0_ptr = LLVMBuildGEP(builder, bld->a0_ptr, &index, 1, "");
+                  util_snprintf(name, sizeof name, "a0_%u.%c", attrib, "xyzw"[chan]);
+                  a0 = LLVMBuildLoad(builder, a0_ptr, name);
                   break;
+               }
 
                default:
-                  assert( 0 );
-                 break;
+                  assert(0);
+                  break;
+               }
+
+               input = a0;
+
+               if (decl->Declaration.Interpolate != TGSI_INTERPOLATE_CONSTANT) {
+                  input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->x, dadx));
+                  input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->y, dady));
                }
+
+               if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
+                  if(!bld->oow)
+                     bld->oow = lp_build_rcp(&bld->base, bld->w);
+                  input = lp_build_mul(&bld->base, input, bld->oow);
+               }
+
+               util_snprintf(name, sizeof name, "input%u.%c", attrib, "xyzw"[chan]);
+               LLVMSetValueName(input, name);
             }
+
+            bld->inputs[attrib][chan] = input;
          }
       }
    }
-#endif
 }
 
 /**
@@ -1396,7 +1426,10 @@ void
 lp_build_tgsi_soa(LLVMBuilderRef builder,
                   const struct tgsi_token *tokens,
                   union lp_type type,
-                  LLVMValueRef (*inputs)[4],
+                  LLVMValueRef *pos,
+                  LLVMValueRef a0_ptr,
+                  LLVMValueRef dadx_ptr,
+                  LLVMValueRef dady_ptr,
                   LLVMValueRef consts_ptr,
                   LLVMValueRef (*outputs)[4],
                   LLVMValueRef samplers_ptr)
@@ -1409,7 +1442,12 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
    /* Setup build context */
    memset(&bld, 0, sizeof bld);
    lp_build_context_init(&bld.base, builder, type);
-   bld.inputs = inputs;
+   bld.x = pos[0];
+   bld.y = pos[1];
+   bld.w = pos[3];
+   bld.a0_ptr = a0_ptr;
+   bld.dadx_ptr = dadx_ptr;
+   bld.dady_ptr = dady_ptr;
    bld.outputs = outputs;
    bld.consts_ptr = consts_ptr;
    bld.samplers_ptr = samplers_ptr;
index 2c1a849..ef1c8c3 100644 (file)
 
 
 typedef void
-(*lp_shader_fs_func)(void *inputs,
+(*lp_shader_fs_func)(void *pos,
+                     void *a0,
+                     void *dadx,
+                     void *dady,
                      void *consts,
                      void *outputs,
                      struct tgsi_sampler **samplers);
@@ -65,6 +68,13 @@ struct lp_llvm_fragment_shader
    LLVMValueRef function;
 
    lp_shader_fs_func jit_function;
+
+   union tgsi_exec_channel ALIGN16_ATTRIB pos[NUM_CHANNELS];
+   union tgsi_exec_channel ALIGN16_ATTRIB a0[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+   union tgsi_exec_channel ALIGN16_ATTRIB dadx[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+   union tgsi_exec_channel ALIGN16_ATTRIB dady[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+
+   uint32_t magic;
 };
 
 
@@ -84,15 +94,19 @@ shader_generate(struct llvmpipe_screen *screen,
    union lp_type type;
    LLVMTypeRef elem_type;
    LLVMTypeRef vec_type;
-   LLVMTypeRef args[4];
-   LLVMValueRef inputs_ptr;
+   LLVMTypeRef arg_types[7];
+   LLVMTypeRef func_type;
+   LLVMValueRef pos_ptr;
+   LLVMValueRef a0_ptr;
+   LLVMValueRef dadx_ptr;
+   LLVMValueRef dady_ptr;
    LLVMValueRef consts_ptr;
    LLVMValueRef outputs_ptr;
    LLVMValueRef samplers_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
-   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][4];
-   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][4];
+   LLVMValueRef pos[NUM_CHANNELS];
+   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
    char name[32];
    unsigned i, j;
 
@@ -106,19 +120,31 @@ shader_generate(struct llvmpipe_screen *screen,
    elem_type = lp_build_elem_type(type);
    vec_type = lp_build_vec_type(type);
 
-   args[0] = LLVMPointerType(vec_type, 0);
-   args[1] = LLVMPointerType(elem_type, 0);
-   args[2] = LLVMPointerType(vec_type, 0);
-   args[3] = LLVMPointerType(LLVMInt8Type(), 0);
-   shader->function = LLVMAddFunction(screen->module, "shader", LLVMFunctionType(LLVMVoidType(), args, 4, 0));
-   LLVMSetFunctionCallConv(shader->function, LLVMCCallConv);
+   arg_types[0] = LLVMPointerType(vec_type, 0);        /* pos */
+   arg_types[1] = LLVMPointerType(vec_type, 0);        /* a0 */
+   arg_types[2] = LLVMPointerType(vec_type, 0);        /* dadx */
+   arg_types[3] = LLVMPointerType(vec_type, 0);        /* dady */
+   arg_types[4] = LLVMPointerType(elem_type, 0);       /* consts */
+   arg_types[5] = LLVMPointerType(vec_type, 0);        /* outputs */
+   arg_types[6] = LLVMPointerType(LLVMInt8Type(), 0);  /* samplers */
+
+   func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
 
-   inputs_ptr = LLVMGetParam(shader->function, 0);
-   consts_ptr = LLVMGetParam(shader->function, 1);
-   outputs_ptr = LLVMGetParam(shader->function, 2);
-   samplers_ptr = LLVMGetParam(shader->function, 3);
+   shader->function = LLVMAddFunction(screen->module, "shader", func_type);
+   LLVMSetFunctionCallConv(shader->function, LLVMCCallConv);
 
-   LLVMSetValueName(inputs_ptr, "inputs");
+   pos_ptr = LLVMGetParam(shader->function, 0);
+   a0_ptr = LLVMGetParam(shader->function, 1);
+   dadx_ptr = LLVMGetParam(shader->function, 2);
+   dady_ptr = LLVMGetParam(shader->function, 3);
+   consts_ptr = LLVMGetParam(shader->function, 4);
+   outputs_ptr = LLVMGetParam(shader->function, 5);
+   samplers_ptr = LLVMGetParam(shader->function, 6);
+
+   LLVMSetValueName(pos_ptr, "pos");
+   LLVMSetValueName(a0_ptr, "a0");
+   LLVMSetValueName(dadx_ptr, "dadx");
+   LLVMSetValueName(dady_ptr, "dady");
    LLVMSetValueName(consts_ptr, "consts");
    LLVMSetValueName(outputs_ptr, "outputs");
    LLVMSetValueName(samplers_ptr, "samplers");
@@ -127,23 +153,23 @@ shader_generate(struct llvmpipe_screen *screen,
    builder = LLVMCreateBuilder();
    LLVMPositionBuilderAtEnd(builder, block);
 
-   for(i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) {
-      for(j = 0; j < 4; ++j) {
-         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i*4 + j, 0);
-         util_snprintf(name, sizeof name, "input%u.%c", i, "xywz"[j]);
-         inputs[i][j] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, inputs_ptr, &index, 1, ""), name);
-      }
+   for(j = 0; j < NUM_CHANNELS; ++j) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), j, 0);
+      util_snprintf(name, sizeof name, "pos.%c", "xyzw"[j]);
+      pos[j] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, pos_ptr, &index, 1, ""), name);
    }
 
    memset(outputs, 0, sizeof outputs);
 
-   lp_build_tgsi_soa(builder, tokens, type, inputs, consts_ptr, outputs, samplers_ptr);
+   lp_build_tgsi_soa(builder, tokens, type,
+                     pos, a0_ptr, dadx_ptr, dady_ptr,
+                     consts_ptr, outputs, samplers_ptr);
 
    for(i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) {
-      for(j = 0; j < 4; ++j) {
+      for(j = 0; j < NUM_CHANNELS; ++j) {
          if(outputs[i][j]) {
-            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i*4 + j, 0);
-            util_snprintf(name, sizeof name, "output%u.%c", i, "xywz"[j]);
+            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i*NUM_CHANNELS + j, 0);
+            util_snprintf(name, sizeof name, "output%u.%c", i, "xyzw"[j]);
             LLVMBuildStore(builder, outputs[i][j], LLVMBuildGEP(builder, outputs_ptr, &index, 1, name));
          }
       }
@@ -175,131 +201,56 @@ fs_llvm_prepare( const struct lp_fragment_shader *base,
 
 
 
-
-/**
- * Evaluate a constant-valued coefficient at the position of the
- * current quad.
- */
 static void
-eval_constant_coef(
-   struct tgsi_exec_machine *mach,
-   unsigned attrib,
-   unsigned chan )
+setup_pos_vector(struct lp_llvm_fragment_shader *shader,
+                 const struct tgsi_interp_coef *coef,
+                 float x, float y)
 {
-   unsigned i;
-
-   for( i = 0; i < QUAD_SIZE; i++ ) {
-      mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
+   uint chan;
+
+   /* do X */
+   shader->pos[0].f[0] = x;
+   shader->pos[0].f[1] = x + 1;
+   shader->pos[0].f[2] = x;
+   shader->pos[0].f[3] = x + 1;
+
+   /* do Y */
+   shader->pos[1].f[0] = y;
+   shader->pos[1].f[1] = y;
+   shader->pos[1].f[2] = y + 1;
+   shader->pos[1].f[3] = y + 1;
+
+   /* do Z and W for all fragments in the quad */
+   for (chan = 2; chan < 4; chan++) {
+      const float dadx = coef->dadx[chan];
+      const float dady = coef->dady[chan];
+      const float a0 = coef->a0[chan] + dadx * x + dady * y;
+      shader->pos[chan].f[0] = a0;
+      shader->pos[chan].f[1] = a0 + dadx;
+      shader->pos[chan].f[2] = a0 + dady;
+      shader->pos[chan].f[3] = a0 + dadx + dady;
    }
 }
 
-/**
- * Evaluate a linear-valued coefficient at the position of the
- * current quad.
- */
-static void
-eval_linear_coef(
-   struct tgsi_exec_machine *mach,
-   unsigned attrib,
-   unsigned chan )
-{
-   const float x = mach->QuadPos.xyzw[0].f[0];
-   const float y = mach->QuadPos.xyzw[1].f[0];
-   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
-   const float dady = mach->InterpCoefs[attrib].dady[chan];
-   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
-   mach->Inputs[attrib].xyzw[chan].f[0] = a0;
-   mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
-   mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
-   mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
-}
-
-/**
- * Evaluate a perspective-valued coefficient at the position of the
- * current quad.
- */
-static void
-eval_perspective_coef(
-   struct tgsi_exec_machine *mach,
-   unsigned attrib,
-   unsigned chan )
-{
-   const float x = mach->QuadPos.xyzw[0].f[0];
-   const float y = mach->QuadPos.xyzw[1].f[0];
-   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
-   const float dady = mach->InterpCoefs[attrib].dady[chan];
-   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
-   const float *w = mach->QuadPos.xyzw[3].f;
-   /* divide by W here */
-   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
-   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
-   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
-   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
-}
-
-
-typedef void
-(*eval_coef_func)(struct tgsi_exec_machine *mach,
-                  unsigned attrib,
-                  unsigned chan );
-
 
 static void
-exec_declaration(
-   struct tgsi_exec_machine *mach,
-   const struct tgsi_full_declaration *decl )
+setup_coef_vector(struct lp_llvm_fragment_shader *shader,
+                  const struct tgsi_interp_coef *coef)
 {
-   if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
-      if( decl->Declaration.File == TGSI_FILE_INPUT ) {
-         unsigned first, last, mask;
-         eval_coef_func eval;
-
-         first = decl->DeclarationRange.First;
-         last = decl->DeclarationRange.Last;
-         mask = decl->Declaration.UsageMask;
-
-         switch( decl->Declaration.Interpolate ) {
-         case TGSI_INTERPOLATE_CONSTANT:
-            eval = eval_constant_coef;
-            break;
-
-         case TGSI_INTERPOLATE_LINEAR:
-            eval = eval_linear_coef;
-            break;
-
-         case TGSI_INTERPOLATE_PERSPECTIVE:
-            eval = eval_perspective_coef;
-            break;
-
-         default:
-            eval = NULL;
-            assert( 0 );
-         }
-
-         if( mask == TGSI_WRITEMASK_XYZW ) {
-            unsigned i, j;
-
-            for( i = first; i <= last; i++ ) {
-               for( j = 0; j < NUM_CHANNELS; j++ ) {
-                  eval( mach, i, j );
-               }
-            }
-         }
-         else {
-            unsigned i, j;
-
-            for( j = 0; j < NUM_CHANNELS; j++ ) {
-               if( mask & (1 << j) ) {
-                  for( i = first; i <= last; i++ ) {
-                     eval( mach, i, j );
-                  }
-               }
-            }
+   unsigned attrib, chan, i;
+
+   for (attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; ++attrib) {
+      for (chan = 0; chan < NUM_CHANNELS; ++chan) {
+         for( i = 0; i < QUAD_SIZE; ++i ) {
+            shader->a0[attrib][chan].f[i] = coef[attrib].a0[chan];
+            shader->dadx[attrib][chan].f[i] = coef[attrib].dadx[chan];
+            shader->dady[attrib][chan].f[i] = coef[attrib].dady[chan];
          }
       }
    }
 }
 
+
 /* TODO: codegenerate the whole run function, skip this wrapper.
  * TODO: break dependency on tgsi_exec_machine struct
  * TODO: push Position calculation into the generated shader
@@ -311,25 +262,24 @@ fs_llvm_run( const struct lp_fragment_shader *base,
            struct quad_header *quad )
 {
    struct lp_llvm_fragment_shader *shader = lp_llvm_fragment_shader(base);
-   unsigned i;
    unsigned mask;
 
    /* Compute X, Y, Z, W vals for this quad */
-   lp_setup_pos_vector(quad->posCoef, 
-                       (float)quad->input.x0, (float)quad->input.y0,
-                       &machine->QuadPos);
+   setup_pos_vector(shader,
+                    quad->posCoef,
+                   (float)quad->input.x0, (float)quad->input.y0);
+
+   setup_coef_vector(shader,
+                     quad->coef);
 
    /* init kill mask */
    tgsi_set_kill_mask(machine, 0x0);
    tgsi_set_exec_mask(machine, 1, 1, 1, 1);
 
-   /* execute declarations (interpolants) */
-   for (i = 0; i < machine->NumDeclarations; i++)
-      exec_declaration( machine, &machine->Declarations[i] );
-
    memset(machine->Outputs, 0, sizeof machine->Outputs);
 
-   shader->jit_function( machine->Inputs,
+   shader->jit_function( shader->pos,
+                         shader->a0, shader->dadx, shader->dady,
                          machine->Consts,
                          machine->Outputs,
                          machine->Samplers);