i965_drv_video/render: rendering for Sandybridge
authorXiang, Haihao <haihao.xiang@intel.com>
Tue, 9 Nov 2010 05:08:47 +0000 (13:08 +0800)
committerXiang, Haihao <haihao.xiang@intel.com>
Mon, 6 Dec 2010 04:50:04 +0000 (12:50 +0800)
Signed-off-by: Xiang, Haihao <haihao.xiang@intel.com>
i965_drv_video/i965_defines.h
i965_drv_video/i965_drv_video.c
i965_drv_video/i965_render.c
i965_drv_video/i965_render.h
i965_drv_video/i965_structs.h

index d743688..c067bec 100644 (file)
 
 #define CMD_PIPELINED_POINTERS                  CMD(3, 0, 0)
 #define CMD_BINDING_TABLE_POINTERS              CMD(3, 0, 1)
+# define GEN6_BINDING_TABLE_MODIFY_PS           (1 << 12)/* for GEN6 */
+# define GEN6_BINDING_TABLE_MODIFY_GS           (1 << 9) /* for GEN6 */
+# define GEN6_BINDING_TABLE_MODIFY_VS           (1 << 8) /* for GEN6 */
+
 #define CMD_VERTEX_BUFFERS                      CMD(3, 0, 8)
 #define CMD_VERTEX_ELEMENTS                     CMD(3, 0, 9)
 #define CMD_DRAWING_RECTANGLE                   CMD(3, 1, 0)
 #define CMD_3DPRIMITIVE                         CMD(3, 3, 0)
 
 #define CMD_DEPTH_BUFFER                        CMD(3, 1, 5)
+# define CMD_DEPTH_BUFFER_TYPE_SHIFT            29
+# define CMD_DEPTH_BUFFER_FORMAT_SHIFT          18
+
+#define CMD_CLEAR_PARAMS                        CMD(3, 1, 0x10)
+/* DW1 */
+# define CMD_CLEAR_PARAMS_DEPTH_CLEAR_VALID     (1 << 15)
+
+/* for GEN6+ */
+#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS    CMD(3, 0, 0x02)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS  (1 << 12)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS  (1 << 9)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS  (1 << 8)
+
+#define GEN6_3DSTATE_URB                       CMD(3, 0, 0x05)
+/* DW1 */
+# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT                16
+# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT     0
+/* DW2 */
+# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT     8
+# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT                0
+
+#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS   CMD(3, 0, 0x0d)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC         (1 << 12)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF         (1 << 11)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP       (1 << 10)
+
+#define GEN6_3DSTATE_CC_STATE_POINTERS         CMD(3, 0, 0x0e)
+
+#define GEN6_3DSTATE_VS                                CMD(3, 0, 0x10)
+
+#define GEN6_3DSTATE_GS                                CMD(3, 0, 0x11)
+/* DW4 */
+# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT      0
+
+#define GEN6_3DSTATE_CLIP                      CMD(3, 0, 0x12)
+
+#define GEN6_3DSTATE_SF                                CMD(3, 0, 0x13)
+/* DW1 */
+# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT             22
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT   11
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT   4
+/* DW2 */
+/* DW3 */
+# define GEN6_3DSTATE_SF_CULL_BOTH                     (0 << 29)
+# define GEN6_3DSTATE_SF_CULL_NONE                     (1 << 29)
+# define GEN6_3DSTATE_SF_CULL_FRONT                    (2 << 29)
+# define GEN6_3DSTATE_SF_CULL_BACK                     (3 << 29)
+/* DW4 */
+# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT             29
+# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT            27
+# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT          25
+
+
+#define GEN6_3DSTATE_WM                                CMD(3, 0, 0x14)
+/* DW2 */
+# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF                   27
+# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT       18
+/* DW4 */
+# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT            16
+/* DW5 */
+# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT                     25
+# define GEN6_3DSTATE_WM_DISPATCH_ENABLE                       (1 << 19)
+# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE                    (1 << 1)
+# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE                     (1 << 0)
+/* DW6 */
+# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT                  20
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC     (1 << 15)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC   (1 << 14)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC      (1 << 13)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC                (1 << 12)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC      (1 << 11)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC         (1 << 10)
+
+
+#define GEN6_3DSTATE_CONSTANT_VS               CMD(3, 0, 0x15)
+#define GEN6_3DSTATE_CONSTANT_GS               CMD(3, 0, 0x16)
+#define GEN6_3DSTATE_CONSTANT_PS               CMD(3, 0, 0x17)
+
+#define GEN6_3DSTATE_SAMPLE_MASK               CMD(3, 0, 0x18)
+
+#define GEN6_3DSTATE_MULTISAMPLE               CMD(3, 1, 0x0d)
+/* DW1 */
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER         (0 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT     (1 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1                  (0 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4                  (2 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8                  (3 << 1)
 
 #define MFX(pipeline, op, sub_opa, sub_opb)     \
     (3 << 29 |                                  \
 #define I965_VFCOMPONENT_STORE_PID    7
 
 #define VE0_VERTEX_BUFFER_INDEX_SHIFT  27
+#define GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT      26 /* for GEN6 */
 #define VE0_VALID                      (1 << 26)
+#define GEN6_VE0_VALID                  (1 << 25) /* for GEN6 */
 #define VE0_FORMAT_SHIFT               16
 #define VE0_OFFSET_SHIFT               0
 #define VE1_VFCOMPONENT_0_SHIFT                28
 #define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT   0
 
 #define VB0_BUFFER_INDEX_SHIFT          27
+#define GEN6_VB0_BUFFER_INDEX_SHIFT     26
 #define VB0_VERTEXDATA                  (0 << 26)
 #define VB0_INSTANCEDATA                (1 << 26)
+#define GEN6_VB0_VERTEXDATA             (0 << 20)
+#define GEN6_VB0_INSTANCEDATA           (1 << 20)
 #define VB0_BUFFER_PITCH_SHIFT          0
 
 #define _3DPRIMITIVE_VERTEX_SEQUENTIAL  (0 << 15)
index 0a4a265..b617684 100644 (file)
@@ -1762,15 +1762,15 @@ i965_PutSurface(VADriverContextP ctx,
     if (flags & (VA_BOTTOM_FIELD | VA_TOP_FIELD))
         pp_flag |= I965_PP_FLAG_DEINTERLACING;
 
-    i965_render_put_surface(ctx, surface,
+    intel_render_put_surface(ctx, surface,
                             srcx, srcy, srcw, srch,
                             destx, desty, destw, desth,
                             pp_flag);
 
     if(obj_surface->subpic != VA_INVALID_ID) { 
-       i965_render_put_subpic(ctx, surface,
-                               srcx, srcy, srcw, srch,
-                               destx, desty, destw, desth);
+       intel_render_put_subpicture(ctx, surface,
+                                    srcx, srcy, srcw, srch,
+                                    destx, desty, destw, desth);
     } 
 
     dri_swap_buffer(ctx, dri_drawable);
index e09d19f..85d3f7d 100644 (file)
@@ -97,6 +97,25 @@ static const unsigned int ps_subpic_kernel_static_gen5[][4] =
 #include "shaders/render/exa_wm_write.g4b.gen5"
 };
 
+/* programs for Sandybridge */
+static const unsigned int sf_kernel_static_gen6[][4] = 
+{
+};
+
+static const uint32_t ps_kernel_static_gen6[][4] = {
+#include "shaders/render/exa_wm_src_affine.g6b"
+#include "shaders/render/exa_wm_src_sample_planar.g6b"
+#include "shaders/render/exa_wm_yuv_rgb.g6b"
+#include "shaders/render/exa_wm_write.g6b"
+};
+
+static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
+#include "shaders/render/exa_wm_src_affine.g6b"
+#include "shaders/render/exa_wm_src_sample_argb.g6b"
+#include "shaders/render/exa_wm_yuv_rgb.g6b"
+#include "shaders/render/exa_wm_write.g6b"
+};
+
 #define SURFACE_STATE_PADDED_SIZE       ALIGN(sizeof(struct i965_surface_state), 32)
 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
@@ -171,6 +190,28 @@ static struct render_kernel render_kernels_gen5[] = {
     }
 };
 
+static struct render_kernel render_kernels_gen6[] = {
+    {
+        "SF",
+        sf_kernel_static_gen6,
+        sizeof(sf_kernel_static_gen6),
+        NULL
+    },
+    {
+        "PS",
+        ps_kernel_static_gen6,
+        sizeof(ps_kernel_static_gen6),
+        NULL
+    },
+
+    {
+        "PS_SUBPIC",
+        ps_subpic_kernel_static_gen6,
+        sizeof(ps_subpic_kernel_static_gen6),
+        NULL
+    }
+};
+
 static struct render_kernel *render_kernels = NULL;
 
 #define NUM_RENDER_KERNEL (sizeof(render_kernels_gen4)/sizeof(render_kernels_gen4[0]))
@@ -1359,7 +1400,7 @@ i965_render_initialize(VADriverContextP ctx)
     render_state->cc.viewport = bo;
 }
 
-void
+static void
 i965_render_put_surface(VADriverContextP ctx,
                         VASurfaceID surface,
                         short srcx,
@@ -1385,8 +1426,519 @@ i965_render_put_surface(VADriverContextP ctx,
     intel_batchbuffer_flush(ctx);
 }
 
+static void
+i965_render_put_subpicture(VADriverContextP ctx,
+                           VASurfaceID surface,
+                           short srcx,
+                           short srcy,
+                           unsigned short srcw,
+                           unsigned short srch,
+                           short destx,
+                           short desty,
+                           unsigned short destw,
+                           unsigned short desth)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface = SURFACE(surface);
+    struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
+    assert(obj_subpic);
+
+    i965_render_initialize(ctx);
+    i965_subpic_render_state_setup(ctx, surface,
+                                   srcx, srcy, srcw, srch,
+                                   destx, desty, destw, desth);
+    i965_subpic_render_pipeline_setup(ctx);
+    i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
+    intel_batchbuffer_flush(ctx);
+}
+
+/*
+ * for GEN6+
+ */
+static void 
+gen6_render_initialize(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+    dri_bo *bo;
+
+    /* VERTEX BUFFER */
+    dri_bo_unreference(render_state->vb.vertex_buffer);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "vertex buffer",
+                      4096,
+                      4096);
+    assert(bo);
+    render_state->vb.vertex_buffer = bo;
+
+    /* WM */
+    dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "surface state & binding table",
+                      (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
+                      4096);
+    assert(bo);
+    render_state->wm.surface_state_binding_table_bo = bo;
+
+    dri_bo_unreference(render_state->wm.sampler);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "sampler state",
+                      MAX_SAMPLERS * sizeof(struct i965_sampler_state),
+                      4096);
+    assert(bo);
+    render_state->wm.sampler = bo;
+    render_state->wm.sampler_count = 0;
+
+    /* COLOR CALCULATOR */
+    dri_bo_unreference(render_state->cc.state);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "color calc state",
+                      sizeof(struct gen6_color_calc_state),
+                      4096);
+    assert(bo);
+    render_state->cc.state = bo;
+
+    /* CC VIEWPORT */
+    dri_bo_unreference(render_state->cc.viewport);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "cc viewport",
+                      sizeof(struct i965_cc_viewport),
+                      4096);
+    assert(bo);
+    render_state->cc.viewport = bo;
+
+    /* BLEND STATE */
+    dri_bo_unreference(render_state->cc.blend);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "blend state",
+                      sizeof(struct gen6_blend_state),
+                      4096);
+    assert(bo);
+    render_state->cc.blend = bo;
+
+    /* DEPTH & STENCIL STATE */
+    dri_bo_unreference(render_state->cc.depth_stencil);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "depth & stencil state",
+                      sizeof(struct gen6_depth_stencil_state),
+                      4096);
+    assert(bo);
+    render_state->cc.depth_stencil = bo;
+}
+
+static void
+gen6_render_color_calc_state(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+    struct gen6_color_calc_state *color_calc_state;
+    
+    dri_bo_map(render_state->cc.state, 1);
+    assert(render_state->cc.state->virtual);
+    color_calc_state = render_state->cc.state->virtual;
+    memset(color_calc_state, 0, sizeof(*color_calc_state));
+    color_calc_state->constant_r = 1.0;
+    color_calc_state->constant_g = 0.0;
+    color_calc_state->constant_b = 1.0;
+    color_calc_state->constant_a = 1.0;
+    dri_bo_unmap(render_state->cc.state);
+}
+
+static void
+gen6_render_blend_state(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+    struct gen6_blend_state *blend_state;
+    
+    dri_bo_map(render_state->cc.blend, 1);
+    assert(render_state->cc.blend->virtual);
+    blend_state = render_state->cc.blend->virtual;
+    memset(blend_state, 0, sizeof(*blend_state));
+    blend_state->blend1.logic_op_enable = 1;
+    blend_state->blend1.logic_op_func = 0xc;
+    dri_bo_unmap(render_state->cc.blend);
+}
+
+static void
+gen6_render_depth_stencil_state(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+    struct gen6_depth_stencil_state *depth_stencil_state;
+    
+    dri_bo_map(render_state->cc.depth_stencil, 1);
+    assert(render_state->cc.depth_stencil->virtual);
+    depth_stencil_state = render_state->cc.depth_stencil->virtual;
+    memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
+    dri_bo_unmap(render_state->cc.depth_stencil);
+}
+
+static void
+gen6_render_setup_states(VADriverContextP ctx,
+                         VASurfaceID surface,
+                         short srcx,
+                         short srcy,
+                         unsigned short srcw,
+                         unsigned short srch,
+                         short destx,
+                         short desty,
+                         unsigned short destw,
+                         unsigned short desth)
+{
+    i965_render_dest_surface_state(ctx, 0);
+    i965_render_src_surfaces_state(ctx, surface);
+    i965_render_sampler(ctx);
+    i965_render_cc_viewport(ctx);
+    gen6_render_color_calc_state(ctx);
+    gen6_render_blend_state(ctx);
+    gen6_render_depth_stencil_state(ctx);
+    i965_render_upload_vertex(ctx, surface,
+                              srcx, srcy, srcw, srch,
+                              destx, desty, destw, desth);
+}
+
+static void
+gen6_emit_invarient_states(VADriverContextP ctx)
+{
+    OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+    OUT_BATCH(ctx, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
+    OUT_BATCH(ctx, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
+              GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
+    OUT_BATCH(ctx, 0);
+
+    OUT_BATCH(ctx, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
+    OUT_BATCH(ctx, 1);
+
+    /* Set system instruction pointer */
+    OUT_BATCH(ctx, CMD_STATE_SIP | 0);
+    OUT_BATCH(ctx, 0);
+}
+
+static void
+gen6_emit_state_base_address(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+
+    OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | (10 - 2));
+    OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* General state base address */
+    OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
+    OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
+    OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Indirect object base address */
+    OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Instruction base address */
+    OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* General state upper bound */
+    OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
+    OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
+    OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
+}
+
+static void
+gen6_emit_viewport_state_pointers(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+
+    OUT_BATCH(ctx, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
+              GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
+              (4 - 2));
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_RELOC(ctx, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+static void
+gen6_emit_urb(VADriverContextP ctx)
+{
+    OUT_BATCH(ctx, GEN6_3DSTATE_URB | (3 - 2));
+    OUT_BATCH(ctx, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
+              (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
+    OUT_BATCH(ctx, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
+              (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
+}
+
+static void
+gen6_emit_cc_state_pointers(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+
+    OUT_BATCH(ctx, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
+    OUT_RELOC(ctx, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+    OUT_RELOC(ctx, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+    OUT_RELOC(ctx, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+}
+
+static void
+gen6_emit_sampler_state_pointers(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+
+    OUT_BATCH(ctx, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
+              GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
+              (4 - 2));
+    OUT_BATCH(ctx, 0); /* VS */
+    OUT_BATCH(ctx, 0); /* GS */
+    OUT_RELOC(ctx,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+static void
+gen6_emit_binding_table(VADriverContextP ctx)
+{
+    /* Binding table pointers */
+    OUT_BATCH(ctx, CMD_BINDING_TABLE_POINTERS |
+              GEN6_BINDING_TABLE_MODIFY_PS |
+              (4 - 2));
+    OUT_BATCH(ctx, 0);         /* vs */
+    OUT_BATCH(ctx, 0);         /* gs */
+    /* Only the PS uses the binding table */
+    OUT_BATCH(ctx, BINDING_TABLE_OFFSET);
+}
+
+static void
+gen6_emit_depth_buffer_state(VADriverContextP ctx)
+{
+    OUT_BATCH(ctx, CMD_DEPTH_BUFFER | (7 - 2));
+    OUT_BATCH(ctx, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
+              (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+
+    OUT_BATCH(ctx, CMD_CLEAR_PARAMS | (2 - 2));
+    OUT_BATCH(ctx, 0);
+}
+
+static void
+gen6_emit_drawing_rectangle(VADriverContextP ctx)
+{
+    i965_render_drawing_rectangle(ctx);
+}
+
+static void 
+gen6_emit_vs_state(VADriverContextP ctx)
+{
+    /* disable VS constant buffer */
+    OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+       
+    OUT_BATCH(ctx, GEN6_3DSTATE_VS | (6 - 2));
+    OUT_BATCH(ctx, 0); /* without VS kernel */
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0); /* pass-through */
+}
+
+static void 
+gen6_emit_gs_state(VADriverContextP ctx)
+{
+    /* disable GS constant buffer */
+    OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+       
+    OUT_BATCH(ctx, GEN6_3DSTATE_GS | (7 - 2));
+    OUT_BATCH(ctx, 0); /* without GS kernel */
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0); /* pass-through */
+}
+
+static void 
+gen6_emit_clip_state(VADriverContextP ctx)
+{
+    OUT_BATCH(ctx, GEN6_3DSTATE_CLIP | (4 - 2));
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0); /* pass-through */
+    OUT_BATCH(ctx, 0);
+}
+
+static void 
+gen6_emit_sf_state(VADriverContextP ctx)
+{
+    OUT_BATCH(ctx, GEN6_3DSTATE_SF | (20 - 2));
+    OUT_BATCH(ctx, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
+              (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
+              (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, GEN6_3DSTATE_SF_CULL_NONE);
+    OUT_BATCH(ctx, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0); /* DW9 */
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0); /* DW14 */
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0); /* DW19 */
+}
+
+static void 
+gen6_emit_wm_state(VADriverContextP ctx)
+{
+    /* disable WM constant buffer */
+    OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+
+    OUT_BATCH(ctx, GEN6_3DSTATE_WM | (9 - 2));
+    OUT_RELOC(ctx, render_kernels[PS_KERNEL].bo,
+              I915_GEM_DOMAIN_INSTRUCTION, 0,
+              0);
+    OUT_BATCH(ctx, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
+              (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
+    OUT_BATCH(ctx, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
+              GEN6_3DSTATE_WM_DISPATCH_ENABLE |
+              GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
+    OUT_BATCH(ctx, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
+              GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+}
+
+static void
+gen6_emit_vertex_element_state(VADriverContextP ctx)
+{
+    /* Set up our vertex elements, sourced from the single vertex buffer. */
+    OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | (5 - 2));
+    /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
+    OUT_BATCH(ctx, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+              GEN6_VE0_VALID |
+              (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+              (0 << VE0_OFFSET_SHIFT));
+    OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+              (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+    /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
+    OUT_BATCH(ctx, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+              GEN6_VE0_VALID |
+              (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+              (8 << VE0_OFFSET_SHIFT));
+    OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
+              (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+}
+
+static void
+gen6_emit_vertices(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+
+    BEGIN_BATCH(ctx, 11);
+    OUT_BATCH(ctx, CMD_VERTEX_BUFFERS | 3);
+    OUT_BATCH(ctx, 
+              (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
+              GEN6_VB0_VERTEXDATA |
+              ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
+    OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
+    OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
+    OUT_BATCH(ctx, 0);
+
+    OUT_BATCH(ctx, 
+              CMD_3DPRIMITIVE |
+              _3DPRIMITIVE_VERTEX_SEQUENTIAL |
+              (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
+              (0 << 9) |
+              4);
+    OUT_BATCH(ctx, 3); /* vertex count per instance */
+    OUT_BATCH(ctx, 0); /* start vertex offset */
+    OUT_BATCH(ctx, 1); /* single instance */
+    OUT_BATCH(ctx, 0); /* start instance location */
+    OUT_BATCH(ctx, 0); /* index buffer offset, ignored */
+    ADVANCE_BATCH(ctx);
+}
+
+static void
+gen6_render_emit_states(VADriverContextP ctx)
+{
+    intel_batchbuffer_start_atomic(ctx, 0x1000);
+    intel_batchbuffer_emit_mi_flush(ctx);
+    gen6_emit_invarient_states(ctx);
+    gen6_emit_state_base_address(ctx);
+    gen6_emit_viewport_state_pointers(ctx);
+    gen6_emit_urb(ctx);
+    gen6_emit_cc_state_pointers(ctx);
+    gen6_emit_sampler_state_pointers(ctx);
+    gen6_emit_vs_state(ctx);
+    gen6_emit_gs_state(ctx);
+    gen6_emit_clip_state(ctx);
+    gen6_emit_sf_state(ctx);
+    gen6_emit_wm_state(ctx);
+    gen6_emit_binding_table(ctx);
+    gen6_emit_depth_buffer_state(ctx);
+    gen6_emit_drawing_rectangle(ctx);
+    gen6_emit_vertex_element_state(ctx);
+    gen6_emit_vertices(ctx);
+    intel_batchbuffer_end_atomic(ctx);
+}
+
+static void
+gen6_render_put_surface(VADriverContextP ctx,
+                        VASurfaceID surface,
+                        short srcx,
+                        short srcy,
+                        unsigned short srcw,
+                        unsigned short srch,
+                        short destx,
+                        short desty,
+                        unsigned short destw,
+                        unsigned short desth,
+                        unsigned int flag)
+{
+    gen6_render_initialize(ctx);
+    gen6_render_setup_states(ctx, surface,
+                             srcx, srcy, srcw, srch,
+                             destx, desty, destw, desth);
+    gen6_render_emit_states(ctx);
+    intel_batchbuffer_flush(ctx);
+}
+
+static void
+gen6_render_put_subpicture(VADriverContextP ctx,
+                           VASurfaceID surface,
+                           short srcx,
+                           short srcy,
+                           unsigned short srcw,
+                           unsigned short srch,
+                           short destx,
+                           short desty,
+                           unsigned short destw,
+                           unsigned short desth)
+{
+
+}
+
+/*
+ * global functions
+ */
 void
-i965_render_put_subpic(VADriverContextP ctx,
+intel_render_put_surface(VADriverContextP ctx,
                         VASurfaceID surface,
                         short srcx,
                         short srcy,
@@ -1395,22 +1947,46 @@ i965_render_put_subpic(VADriverContextP ctx,
                         short destx,
                         short desty,
                         unsigned short destw,
-                        unsigned short desth)
+                        unsigned short desth,
+                        unsigned int flag)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct object_surface *obj_surface = SURFACE(surface);
-    struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
-    assert(obj_subpic);
 
-    i965_render_initialize(ctx);
-    i965_subpic_render_state_setup(ctx, surface,
-           srcx, srcy, srcw, srch,
-           destx, desty, destw, desth);
-    i965_subpic_render_pipeline_setup(ctx);
-    i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
-    intel_batchbuffer_flush(ctx);
+    if (IS_GEN6(i965->intel.device_id))
+        gen6_render_put_surface(ctx, surface,
+                                srcx, srcy, srcw, srch,
+                                destx, desty, destw, desth,
+                                flag);
+    else
+        i965_render_put_surface(ctx, surface,
+                                srcx, srcy, srcw, srch,
+                                destx, desty, destw, desth,
+                                flag);
 }
 
+void
+intel_render_put_subpicture(VADriverContextP ctx,
+                           VASurfaceID surface,
+                           short srcx,
+                           short srcy,
+                           unsigned short srcw,
+                           unsigned short srch,
+                           short destx,
+                           short desty,
+                           unsigned short destw,
+                           unsigned short desth)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+    if (IS_GEN6(i965->intel.device_id))
+        gen6_render_put_subpicture(ctx, surface,
+                                   srcx, srcy, srcw, srch,
+                                   destx, desty, destw, desth);
+    else
+        i965_render_put_subpicture(ctx, surface,
+                                   srcx, srcy, srcw, srch,
+                                   destx, desty, destw, desth);
+}
 
 Bool 
 i965_render_init(VADriverContextP ctx)
@@ -1422,14 +1998,22 @@ i965_render_init(VADriverContextP ctx)
     /* kernel */
     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
                                  sizeof(render_kernels_gen5[0])));
+    assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
+                                 sizeof(render_kernels_gen6[0])));
 
-    if (IS_IRONLAKE(i965->intel.device_id))
+    if (IS_GEN6(i965->intel.device_id))
+        render_kernels = render_kernels_gen6;
+    else if (IS_IRONLAKE(i965->intel.device_id))
         render_kernels = render_kernels_gen5;
     else
         render_kernels = render_kernels_gen4;
 
     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
         struct render_kernel *kernel = &render_kernels[i];
+
+        if (!kernel->size)
+            continue;
+
         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
                                   kernel->name, 
                                   kernel->size, 0x1000);
@@ -1483,6 +2067,10 @@ i965_render_terminate(VADriverContextP ctx)
     render_state->cc.viewport = NULL;
     dri_bo_unreference(render_state->cc.state);
     render_state->cc.state = NULL;
+    dri_bo_unreference(render_state->cc.blend);
+    render_state->cc.blend = NULL;
+    dri_bo_unreference(render_state->cc.depth_stencil);
+    render_state->cc.depth_stencil = NULL;
 
     if (render_state->draw_region) {
         dri_bo_unreference(render_state->draw_region->bo);
index d341d04..8ff4fe2 100644 (file)
@@ -57,6 +57,8 @@ struct i965_render_state
     struct {
         dri_bo *state;
         dri_bo *viewport;
+        dri_bo *blend;
+        dri_bo *depth_stencil;
     } cc;
 
     struct {
@@ -73,7 +75,7 @@ struct i965_render_state
 
 Bool i965_render_init(VADriverContextP ctx);
 Bool i965_render_terminate(VADriverContextP ctx);
-void i965_render_put_surface(VADriverContextP ctx,
+void intel_render_put_surface(VADriverContextP ctx,
                              VASurfaceID surface,
                              short srcx,
                              short srcy,
@@ -87,7 +89,7 @@ void i965_render_put_surface(VADriverContextP ctx,
 
 
 void
-i965_render_put_subpic(VADriverContextP ctx,
+intel_render_put_subpicture(VADriverContextP ctx,
                         VASurfaceID surface,
                         short srcx,
                         short srcy,
index f8be616..5f85269 100644 (file)
@@ -964,4 +964,105 @@ struct i965_sampler_dndi
     } dw7;
 };
 
+
+struct gen6_blend_state
+{
+    struct {
+        unsigned int dest_blend_factor:5;
+        unsigned int source_blend_factor:5;
+        unsigned int pad3:1;
+        unsigned int blend_func:3;
+        unsigned int pad2:1;
+        unsigned int ia_dest_blend_factor:5;
+        unsigned int ia_source_blend_factor:5;
+        unsigned int pad1:1;
+        unsigned int ia_blend_func:3;
+        unsigned int pad0:1;
+        unsigned int ia_blend_enable:1;
+        unsigned int blend_enable:1;
+    } blend0;
+
+    struct {
+        unsigned int post_blend_clamp_enable:1;
+        unsigned int pre_blend_clamp_enable:1;
+        unsigned int clamp_range:2;
+        unsigned int pad0:4;
+        unsigned int x_dither_offset:2;
+        unsigned int y_dither_offset:2;
+        unsigned int dither_enable:1;
+        unsigned int alpha_test_func:3;
+        unsigned int alpha_test_enable:1;
+        unsigned int pad1:1;
+        unsigned int logic_op_func:4;
+        unsigned int logic_op_enable:1;
+        unsigned int pad2:1;
+        unsigned int write_disable_b:1;
+        unsigned int write_disable_g:1;
+        unsigned int write_disable_r:1;
+        unsigned int write_disable_a:1;
+        unsigned int pad3:1;
+        unsigned int alpha_to_coverage_dither:1;
+        unsigned int alpha_to_one:1;
+        unsigned int alpha_to_coverage:1;
+    } blend1;
+};
+
+struct gen6_color_calc_state
+{
+    struct {
+        unsigned int alpha_test_format:1;
+        unsigned int pad0:14;
+        unsigned int round_disable:1;
+        unsigned int bf_stencil_ref:8;
+        unsigned int stencil_ref:8;
+    } cc0;
+
+    union {
+        float alpha_ref_f;
+        struct {
+            unsigned int ui:8;
+            unsigned int pad0:24;
+        } alpha_ref_fi;
+    } cc1;
+
+    float constant_r;
+    float constant_g;
+    float constant_b;
+    float constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+    struct {
+        unsigned int pad0:3;
+        unsigned int bf_stencil_pass_depth_pass_op:3;
+        unsigned int bf_stencil_pass_depth_fail_op:3;
+        unsigned int bf_stencil_fail_op:3;
+        unsigned int bf_stencil_func:3;
+        unsigned int bf_stencil_enable:1;
+        unsigned int pad1:2;
+        unsigned int stencil_write_enable:1;
+        unsigned int stencil_pass_depth_pass_op:3;
+        unsigned int stencil_pass_depth_fail_op:3;
+        unsigned int stencil_fail_op:3;
+        unsigned int stencil_func:3;
+        unsigned int stencil_enable:1;
+    } ds0;
+
+    struct {
+        unsigned int bf_stencil_write_mask:8;
+        unsigned int bf_stencil_test_mask:8;
+        unsigned int stencil_write_mask:8;
+        unsigned int stencil_test_mask:8;
+    } ds1;
+
+    struct {
+        unsigned int pad0:26;
+        unsigned int depth_write_enable:1;
+        unsigned int depth_test_func:3;
+        unsigned int pad1:1;
+        unsigned int depth_test_enable:1;
+    } ds2;
+};
+
 #endif /* _I965_STRUCTS_H_ */