draw: enable FSE by default
authorKeith Whitwell <keith@tungstengraphics.com>
Wed, 28 May 2008 22:54:18 +0000 (23:54 +0100)
committerKeith Whitwell <keith@tungstengraphics.com>
Wed, 28 May 2008 22:54:18 +0000 (23:54 +0100)
src/gallium/auxiliary/draw/draw_pt.c
src/gallium/auxiliary/draw/draw_vs.h
src/gallium/auxiliary/draw/draw_vs_aos.c
src/gallium/auxiliary/draw/draw_vs_aos.h
src/gallium/auxiliary/draw/draw_vs_aos_io.c
src/gallium/auxiliary/draw/draw_vs_sse.c

index 75f44d5..d48c6c2 100644 (file)
@@ -75,7 +75,7 @@ draw_pt_arrays(struct draw_context *draw,
 
    if (opt == 0) 
       middle = draw->pt.middle.fetch_emit;
-   else if (opt == PT_SHADE && draw->pt.test_fse)
+   else if (opt == PT_SHADE)
       middle = draw->pt.middle.fetch_shade_emit;
    else
       middle = draw->pt.middle.general;
@@ -118,12 +118,9 @@ boolean draw_pt_init( struct draw_context *draw )
    if (!draw->pt.middle.fetch_emit)
       return FALSE;
 
-   if (draw->pt.test_fse) {
-      draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw );
-      if (!draw->pt.middle.fetch_shade_emit)
-         return FALSE;
-   }
-
+   draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw );
+   if (!draw->pt.middle.fetch_shade_emit)
+      return FALSE;
 
    draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
    if (!draw->pt.middle.general)
index 01171bc..7aa0415 100644 (file)
@@ -123,6 +123,10 @@ struct draw_vertex_shader {
 
    struct tgsi_shader_info info;
 
+   /* Extracted from shader:
+    */
+   const float (*immediates)[4];
+
    /* 
     */
    struct draw_vs_varient *varient[16];
index 0cd82ff..9056785 100644 (file)
@@ -66,6 +66,37 @@ static INLINE boolean eq( struct x86_reg a,
           a.disp == b.disp);
 }
       
+struct x86_reg aos_get_x86( struct aos_compilation *cp,
+                            unsigned value )
+{
+   if (cp->ebp != value) {
+      unsigned offset;
+
+      switch (value) {
+      case X86_IMMEDIATES:
+         offset = Offset(struct aos_machine, immediates);
+         break;
+      case X86_CONSTANTS:
+         offset = Offset(struct aos_machine, constants);
+         break;
+      case X86_ATTRIBS:
+         offset = Offset(struct aos_machine, attrib);
+         break;
+      default:
+         assert(0);
+         offset = 0;
+      }
+
+      x86_mov(cp->func, cp->temp_EBP, 
+              x86_make_disp(cp->machine_EDX, offset));
+      /* x86_deref(x86_make_disp(cp->machine_EDX, offset))); */
+
+      cp->ebp = value;
+   }
+
+   return cp->temp_EBP;
+}
+
 
 static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
                                   unsigned file,
@@ -83,15 +114,15 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
    case TGSI_FILE_TEMPORARY:
       return x86_make_disp(ptr, Offset(struct aos_machine, temp[idx]));
 
-   case TGSI_FILE_IMMEDIATE:
-      return x86_make_disp(ptr, Offset(struct aos_machine, immediate[idx]));
-
-   case TGSI_FILE_CONSTANT:       
-      return x86_make_disp(ptr, Offset(struct aos_machine, constant[idx]));
-
    case AOS_FILE_INTERNAL:
       return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx]));
 
+   case TGSI_FILE_IMMEDIATE: 
+      return x86_make_disp(aos_get_x86(cp, X86_IMMEDIATES), idx * 4 * sizeof(float));
+
+   case TGSI_FILE_CONSTANT: 
+      return x86_make_disp(aos_get_x86(cp, X86_CONSTANTS), idx * 4 * sizeof(float));
+
    default:
       ERROR(cp, "unknown reg file");
       return x86_make_reg(0,0);
@@ -1865,6 +1896,7 @@ static boolean emit_rhw_viewport( struct aos_compilation *cp )
 }
 
 
+#if 0
 static boolean note_immediate( struct aos_compilation *cp,
                                struct tgsi_full_immediate *imm )
 {
@@ -1877,6 +1909,7 @@ static boolean note_immediate( struct aos_compilation *cp,
 
    return TRUE;
 }
+#endif
 
 
 
@@ -1939,6 +1972,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
    cp.outbuf_ECX    = x86_make_reg(file_REG32, reg_CX);
    cp.machine_EDX   = x86_make_reg(file_REG32, reg_DX);
    cp.count_ESI     = x86_make_reg(file_REG32, reg_SI);
+   cp.temp_EBP     = x86_make_reg(file_REG32, reg_BP);
 
    x86_init_func(cp.func);
 
@@ -1946,6 +1980,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
 
    x86_push(cp.func, cp.idx_EBX);
    x86_push(cp.func, cp.count_ESI);
+   x86_push(cp.func, cp.temp_EBP);
 
 
    /* Load arguments into regs:
@@ -1988,8 +2023,10 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
 
          switch (parse.FullToken.Token.Type) {
          case TGSI_TOKEN_TYPE_IMMEDIATE:
+#if 0
             if (!note_immediate( &cp, &parse.FullToken.FullImmediate ))
                goto fail;
+#endif
             break;
 
          case TGSI_TOKEN_TYPE_INSTRUCTION:
@@ -2072,6 +2109,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
    if (cp.func->need_emms)
       mmx_emms(cp.func);
 
+   x86_pop(cp.func, cp.temp_EBP);
    x86_pop(cp.func, cp.count_ESI);
    x86_pop(cp.func, cp.idx_EBX);
 
@@ -2098,26 +2136,14 @@ static void vaos_set_buffer( struct draw_vs_varient *varient,
 
    for (i = 0; i < vaos->base.key.nr_inputs; i++) {
       if (vaos->base.key.element[i].in.buffer == buf) {
-         vaos->machine->attrib[i].input_ptr = ((char *)ptr +
-                                               vaos->base.key.element[i].in.offset);
-         vaos->machine->attrib[i].input_stride = stride;
+         vaos->attrib[i].input_ptr = ((char *)ptr +
+                                      vaos->base.key.element[i].in.offset);
+         vaos->attrib[i].input_stride = stride;
       }
    }
 }
 
 
-static void vaos_destroy( struct draw_vs_varient *varient )
-{
-   struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
-
-   if (vaos->machine)
-      align_free( vaos->machine );
-
-   x86_release_func( &vaos->func[0] );
-   x86_release_func( &vaos->func[1] );
-
-   FREE(vaos);
-}
 
 static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
                                       const unsigned *elts,
@@ -2127,6 +2153,10 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
    struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
 
    vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
+   vaos->machine->constants = vaos->draw->pt.user.constants;
+   vaos->machine->immediates = vaos->base.vs->immediates;
+   vaos->machine->attrib = vaos->attrib;
+
    vaos->gen_run_elts( varient,
                        elts,
                        count,
@@ -2141,6 +2171,10 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
    struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
 
    vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
+   vaos->machine->constants = vaos->draw->pt.user.constants;
+   vaos->machine->immediates = vaos->base.vs->immediates;
+   vaos->machine->attrib = vaos->attrib;
+
    vaos->gen_run_linear( varient,
                          start,
                          count,
@@ -2153,10 +2187,6 @@ static void vaos_set_constants( struct draw_vs_varient *varient,
 {
    struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
 
-   memcpy(vaos->machine->constant,
-          constants,
-          (vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1) * 4 * sizeof(float));
-
 #if 0
    unsigned i;
    for (i =0; i < vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1; i++)
@@ -2187,6 +2217,21 @@ static void vaos_set_viewport( struct draw_vs_varient *varient,
    memcpy(vaos->machine->translate, viewport->translate, 4 * sizeof(float));
 }
 
+static void vaos_destroy( struct draw_vs_varient *varient )
+{
+   struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+
+   if (vaos->machine)
+      align_free( vaos->machine );
+
+   FREE( vaos->attrib );
+
+   x86_release_func( &vaos->func[0] );
+   x86_release_func( &vaos->func[1] );
+
+   FREE(vaos);
+}
+
 
 
 static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
@@ -2207,6 +2252,11 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
    vaos->base.run_elts = vaos_run_elts;
 
    vaos->draw = vs->draw;
+
+   vaos->attrib = MALLOC( key->nr_inputs * sizeof(vaos->attrib[0]) );
+   if (!vaos->attrib)
+      goto fail;
+
    vaos->machine = align_malloc( sizeof(struct aos_machine), 16 );
    if (!vaos->machine)
       goto fail;
@@ -2233,7 +2283,10 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
    return &vaos->base;
 
  fail:
-   if (vaos->machine)
+   if (vaos && vaos->attrib)
+      FREE(vaos->attrib);
+
+   if (vaos && vaos->machine)
       align_free( vaos->machine );
 
    if (vaos)
index 837b327..295d2cb 100644 (file)
@@ -78,6 +78,14 @@ struct lit_info {
 #define MAX_SHINE_TAB    4
 #define MAX_LIT_INFO     16
 
+struct aos_attrib {
+   const void *input_ptr;
+   unsigned input_stride;
+};
+
+
+
+
 /* This is the temporary storage used by all the aos_sse vs varients.
  * Create one per context and reuse by passing a pointer in at
  * vs_varient creation??
@@ -86,8 +94,6 @@ struct aos_machine {
    float input    [MAX_INPUTS    ][4];
    float output   [MAX_OUTPUTS   ][4];
    float temp     [MAX_TEMPS     ][4];
-   float constant [MAX_CONSTANTS ][4]; /* fixme -- should just be a pointer */
-   float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */
    float internal [MAX_INTERNALS ][4];
 
    float scale[4];              /* viewport */
@@ -105,12 +111,10 @@ struct aos_machine {
    ushort fpu_restore;
    ushort fpucntl;              /* one of FPU_* above */
 
-   struct {
-      const void *input_ptr;
-      unsigned input_stride;
+   const float (*immediates)[4];     /* points to shader data */
+   const float (*constants)[4];      /* points to draw data */
 
-      unsigned output_offset;
-   } attrib[PIPE_MAX_ATTRIBS];
+   const struct aos_attrib *attrib; /* points to ? */
 };
 
 
@@ -132,6 +136,7 @@ struct aos_compilation {
       unsigned last_used;
    } xmm[8];
 
+   unsigned ebp;                /* one of X86_* */
 
    boolean input_fetched[PIPE_MAX_ATTRIBS];
    unsigned output_last_write[PIPE_MAX_ATTRIBS];
@@ -148,6 +153,7 @@ struct aos_compilation {
    struct x86_reg outbuf_ECX;
    struct x86_reg machine_EDX;
    struct x86_reg count_ESI;    /* decrements to zero */
+   struct x86_reg temp_EBP;
 };
 
 struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp );
@@ -192,20 +198,20 @@ do {                                                                    \
 } while (0)
 
 
+#define X86_NULL       0
+#define X86_IMMEDIATES 1
+#define X86_CONSTANTS  2
+#define X86_ATTRIBS    3
 
-
+struct x86_reg aos_get_x86( struct aos_compilation *cp,
+                            unsigned value );
 
 
 struct draw_vs_varient_aos_sse {
    struct draw_vs_varient base;
    struct draw_context *draw;
 
-#if 0
-   struct {
-      const void *ptr;
-      unsigned stride;
-   } attrib[PIPE_MAX_ATTRIBS];
-#endif
+   struct aos_attrib *attrib;
 
    struct aos_machine *machine; /* XXX: temporarily unshared */
 
index 836110f..45e2092 100644 (file)
@@ -91,25 +91,25 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
 
 
 
-static void get_src_ptr( struct x86_function *func,
+static void get_src_ptr( struct aos_compilation *cp,
                          struct x86_reg src,
-                         struct x86_reg machine,
                          struct x86_reg elt,
                          unsigned a )
 {
-   struct x86_reg input_ptr = 
-      x86_make_disp(machine, 
-                   Offset(struct aos_machine, attrib[a].input_ptr));
+   struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, X86_ATTRIBS ), 
+                                         a * sizeof(struct aos_attrib));
 
-   struct x86_reg input_stride = 
-      x86_make_disp(machine, 
-                   Offset(struct aos_machine, attrib[a].input_stride));
+   struct x86_reg input_ptr = x86_make_disp(attrib, 
+                                            Offset(struct aos_attrib, input_ptr));
+
+   struct x86_reg input_stride = x86_make_disp(attrib, 
+                                               Offset(struct aos_attrib, input_stride));
 
    /* Calculate pointer to current attrib:
     */
-   x86_mov(func, src, input_stride);
-   x86_imul(func, src, elt);
-   x86_add(func, src, input_ptr);
+   x86_mov(cp->func, src, input_stride);
+   x86_imul(cp->func, src, elt);
+   x86_add(cp->func, src, input_ptr);
 }
 
 
@@ -134,9 +134,8 @@ static boolean load_input( struct aos_compilation *cp,
 
    /* Figure out source pointer address:
     */
-   get_src_ptr(cp->func
+   get_src_ptr(cp, 
                src, 
-               cp->machine_EDX, 
                linear ? cp->idx_EBX : x86_deref(cp->idx_EBX),
                idx);
 
index 7781782..24f619a 100644 (file)
@@ -68,8 +68,6 @@ struct draw_sse_vertex_shader {
    codegen_function func;
    
    struct tgsi_exec_machine *machine;
-
-   float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
 };
 
 
@@ -107,7 +105,7 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
                   machine->Outputs,
                   (float (*)[4])constants,
                   machine->Temps,
-                  shader->immediates,
+                  (float (*)[4])shader->base.immediates,
                    input,
                    base->info.num_inputs,
                    input_stride,
@@ -130,6 +128,8 @@ vs_sse_delete( struct draw_vertex_shader *base )
    
    x86_release_func( &shader->sse2_program );
 
+   align_free(shader->base.immediates);
+
    FREE( (void*) shader->base.state.tokens );
    FREE( shader );
 }
@@ -161,12 +161,18 @@ draw_create_vs_sse(struct draw_context *draw,
    vs->base.prepare = vs_sse_prepare;
    vs->base.run_linear = vs_sse_run_linear;
    vs->base.delete = vs_sse_delete;
+   
+   vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
+                                      sizeof(float), 16);
+
    vs->machine = &draw->vs.machine;
    
    x86_init_func( &vs->sse2_program );
 
    if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
-                       &vs->sse2_program, vs->immediates, TRUE )) 
+                       &vs->sse2_program, 
+                        (float (*)[4])vs->base.immediates, 
+                        TRUE )) 
       goto fail;
       
    vs->func = (codegen_function) x86_get_func( &vs->sse2_program );