i965_drv_video: rendering for Ivybridge
authorXiang, Haihao <haihao.xiang@intel.com>
Wed, 25 May 2011 04:55:46 +0000 (12:55 +0800)
committerXiang, Haihao <haihao.xiang@intel.com>
Wed, 25 May 2011 05:01:05 +0000 (13:01 +0800)
Signed-off-by: Xiang, Haihao <haihao.xiang@intel.com>
i965_drv_video/i965_defines.h
i965_drv_video/i965_render.c
i965_drv_video/i965_structs.h

index c689011..66b0b5d 100644 (file)
 #define GEN6_3DSTATE_CLIP                      CMD(3, 0, 0x12)
 
 #define GEN6_3DSTATE_SF                                CMD(3, 0, 0x13)
-/* DW1 */
-# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT             22
-# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT   11
-# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT   4
+/* DW1 on GEN6 */
+# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT              22
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT    11
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT    4
+/* DW1 on GEN7 */
+# define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT      12
+
+
 /* DW2 */
 /* DW3 */
 # define GEN6_3DSTATE_SF_CULL_BOTH                     (0 << 29)
 # define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC      (1 << 11)
 # define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC         (1 << 10)
 
+/* 3DSTATE_WM on GEN7 */
+/* DW1 */
+# define GEN7_WM_STATISTICS_ENABLE                              (1 << 31)
+# define GEN7_WM_DEPTH_CLEAR                                    (1 << 30)
+# define GEN7_WM_DISPATCH_ENABLE                                (1 << 29)
+# define GEN6_WM_DEPTH_RESOLVE                                  (1 << 28)
+# define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE                     (1 << 27)
+# define GEN7_WM_KILL_ENABLE                                    (1 << 25)
+# define GEN7_WM_PSCDEPTH_OFF                                   (0 << 23)
+# define GEN7_WM_PSCDEPTH_ON                                    (1 << 23)
+# define GEN7_WM_PSCDEPTH_ON_GE                                 (2 << 23)
+# define GEN7_WM_PSCDEPTH_ON_LE                                 (3 << 23)
+# define GEN7_WM_USES_SOURCE_DEPTH                              (1 << 20)
+# define GEN7_WM_USES_SOURCE_W                                  (1 << 19)
+# define GEN7_WM_POSITION_ZW_PIXEL                              (0 << 17)
+# define GEN7_WM_POSITION_ZW_CENTROID                           (2 << 17)
+# define GEN7_WM_POSITION_ZW_SAMPLE                             (3 << 17)
+# define GEN7_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC              (1 << 16)
+# define GEN7_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC            (1 << 15)
+# define GEN7_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC               (1 << 14)
+# define GEN7_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC                 (1 << 13)
+# define GEN7_WM_PERSPECTIVE_CENTROID_BARYCENTRIC               (1 << 12)
+# define GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC                  (1 << 11)
+# define GEN7_WM_USES_INPUT_COVERAGE_MASK                       (1 << 10)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5                      (0 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0                      (1 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_2_0                      (2 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_4_0                      (3 << 8)
+# define GEN7_WM_LINE_AA_WIDTH_0_5                              (0 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_1_0                              (1 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_2_0                              (2 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_4_0                              (3 << 6)
+# define GEN7_WM_POLYGON_STIPPLE_ENABLE                         (1 << 4)
+# define GEN7_WM_LINE_STIPPLE_ENABLE                            (1 << 3)
+# define GEN7_WM_POINT_RASTRULE_UPPER_RIGHT                     (1 << 2)
+# define GEN7_WM_MSRAST_OFF_PIXEL                               (0 << 0)
+# define GEN7_WM_MSRAST_OFF_PATTERN                             (1 << 0)
+# define GEN7_WM_MSRAST_ON_PIXEL                                (2 << 0)
+# define GEN7_WM_MSRAST_ON_PATTERN                              (3 << 0)
+/* DW2 */
+# define GEN7_WM_MSDISPMODE_PERPIXEL                            (1 << 31)
 
 #define GEN6_3DSTATE_CONSTANT_VS               CMD(3, 0, 0x15)
 #define GEN6_3DSTATE_CONSTANT_GS               CMD(3, 0, 0x16)
 # define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4                  (2 << 1)
 # define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8                  (3 << 1)
 
+/* GEN7 */
+#define GEN7_3DSTATE_CLEAR_PARAMS               CMD(3, 0, 0x04)
+#define GEN7_3DSTATE_DEPTH_BUFFER               CMD(3, 0, 0x05)
+
+#define GEN7_3DSTATE_URB_VS                     CMD(3, 0, 0x30)
+#define GEN7_3DSTATE_URB_HS                     CMD(3, 0, 0x31)
+#define GEN7_3DSTATE_URB_DS                     CMD(3, 0, 0x32)
+#define GEN7_3DSTATE_URB_GS                     CMD(3, 0, 0x33)
+/* DW1 */
+# define GEN7_URB_ENTRY_NUMBER_SHIFT            0
+# define GEN7_URB_ENTRY_SIZE_SHIFT              16
+# define GEN7_URB_STARTING_ADDRESS_SHIFT        25
+
+#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS     CMD(3, 1, 0x12)
+#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS     CMD(3, 1, 0x16)
+/* DW1 */
+# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
+
+#define GEN7_3DSTATE_CONSTANT_HS                CMD(3, 0, 0x19)
+#define GEN7_3DSTATE_CONSTANT_DS                CMD(3, 0, 0x1a)
+
+#define GEN7_3DSTATE_HS                         CMD(3, 0, 0x1b)
+#define GEN7_3DSTATE_TE                         CMD(3, 0, 0x1c)
+#define GEN7_3DSTATE_DS                         CMD(3, 0, 0x1d)
+#define GEN7_3DSTATE_STREAMOUT                  CMD(3, 0, 0x1e)
+#define GEN7_3DSTATE_SBE                        CMD(3, 0, 0x1f)
+
+/* DW1 */
+# define GEN7_SBE_SWIZZLE_CONTROL_MODE          (1 << 28)
+# define GEN7_SBE_NUM_OUTPUTS_SHIFT             22
+# define GEN7_SBE_SWIZZLE_ENABLE                (1 << 21)
+# define GEN7_SBE_POINT_SPRITE_LOWERLEFT        (1 << 20)
+# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT   11
+# define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT   4
+
+#define GEN7_3DSTATE_PS                                 CMD(3, 0, 0x20)
+/* DW1: kernel pointer */
+/* DW2 */
+# define GEN7_PS_SPF_MODE                               (1 << 31)
+# define GEN7_PS_VECTOR_MASK_ENABLE                     (1 << 30)
+# define GEN7_PS_SAMPLER_COUNT_SHIFT                    27
+# define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT        18
+# define GEN7_PS_FLOATING_POINT_MODE_IEEE_754           (0 << 16)
+# define GEN7_PS_FLOATING_POINT_MODE_ALT                (1 << 16)
+/* DW3: scratch space */
+/* DW4 */
+# define GEN7_PS_MAX_THREADS_SHIFT                      23
+# define GEN7_PS_PUSH_CONSTANT_ENABLE                   (1 << 11)
+# define GEN7_PS_ATTRIBUTE_ENABLE                       (1 << 10)
+# define GEN7_PS_OMASK_TO_RENDER_TARGET                 (1 << 9)
+# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE               (1 << 7)
+# define GEN7_PS_POSOFFSET_NONE                         (0 << 3)
+# define GEN7_PS_POSOFFSET_CENTROID                     (2 << 3)
+# define GEN7_PS_POSOFFSET_SAMPLE                       (3 << 3)
+# define GEN7_PS_32_DISPATCH_ENABLE                     (1 << 2)
+# define GEN7_PS_16_DISPATCH_ENABLE                     (1 << 1)
+# define GEN7_PS_8_DISPATCH_ENABLE                      (1 << 0)
+/* DW5 */
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_0             16
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_1             8
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_2             0
+/* DW6: kernel 1 pointer */
+/* DW7: kernel 2 pointer */
+
+#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL      CMD(3, 0, 0x21)
+#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC         CMD(3, 0, 0x23)
+
+#define GEN7_3DSTATE_BLEND_STATE_POINTERS               CMD(3, 0, 0x24)
+#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS       CMD(3, 0, 0x25)
+
+#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS          CMD(3, 0, 0x26)
+#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS          CMD(3, 0, 0x27)
+#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS          CMD(3, 0, 0x28)
+#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS          CMD(3, 0, 0x29)
+#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS          CMD(3, 0, 0x2a)
+
+#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS          CMD(3, 0, 0x2b)
+#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS          CMD(3, 0, 0x2e)
+#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS          CMD(3, 0, 0x2f)
+
 #define MFX(pipeline, op, sub_opa, sub_opb)     \
     (3 << 29 |                                  \
      (pipeline) << 27 |                         \
 #define VB0_INSTANCEDATA                (1 << 26)
 #define GEN6_VB0_VERTEXDATA             (0 << 20)
 #define GEN6_VB0_INSTANCEDATA           (1 << 20)
+#define GEN7_VB0_ADDRESS_MODIFYENABLE   (1 << 14)
 #define VB0_BUFFER_PITCH_SHIFT          0
 
 #define _3DPRIMITIVE_VERTEX_SEQUENTIAL  (0 << 15)
 #define _3DPRIMITIVE_VERTEX_RANDOM      (1 << 15)
 #define _3DPRIMITIVE_TOPOLOGY_SHIFT     10
+/* DW1 on GEN7*/
+# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL     (0 << 8)
+# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM         (1 << 8)
 
 #define _3DPRIM_POINTLIST         0x01
 #define _3DPRIM_LINELIST          0x02
index 5d2c1bf..d6ae7f3 100644 (file)
@@ -116,7 +116,27 @@ static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
 #include "shaders/render/exa_wm_write.g6b"
 };
 
-#define SURFACE_STATE_PADDED_SIZE       ALIGN(sizeof(struct i965_surface_state), 32)
+/* programs for Ivybridge */
+static const uint32_t sf_kernel_static_gen7[][4] = 
+{
+};
+
+static const uint32_t ps_kernel_static_gen7[][4] = {
+#include "shaders/render/exa_wm_src_affine.g7b"
+#include "shaders/render/exa_wm_src_sample_planar.g7b"
+#include "shaders/render/exa_wm_yuv_rgb.g7b"
+#include "shaders/render/exa_wm_write.g7b"
+};
+
+static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
+#include "shaders/render/exa_wm_src_affine.g7b"
+#include "shaders/render/exa_wm_src_sample_argb.g7b"
+#include "shaders/render/exa_wm_write.g7b"
+};
+
+#define SURFACE_STATE_PADDED_SIZE_I965  ALIGN(sizeof(struct i965_surface_state), 32)
+#define SURFACE_STATE_PADDED_SIZE_GEN7  ALIGN(sizeof(struct gen7_surface_state), 32)
+#define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
 
@@ -213,6 +233,31 @@ static struct i965_kernel render_kernels_gen6[] = {
     }
 };
 
+static struct i965_kernel render_kernels_gen7[] = {
+    {
+        "SF",
+        SF_KERNEL,
+        sf_kernel_static_gen7,
+        sizeof(sf_kernel_static_gen7),
+        NULL
+    },
+    {
+        "PS",
+        PS_KERNEL,
+        ps_kernel_static_gen7,
+        sizeof(ps_kernel_static_gen7),
+        NULL
+    },
+
+    {
+        "PS_SUBPIC",
+        PS_SUBPIC_KERNEL,
+        ps_subpic_kernel_static_gen7,
+        sizeof(ps_subpic_kernel_static_gen7),
+        NULL
+    }
+};
+
 #define URB_VS_ENTRIES       8
 #define URB_VS_ENTRY_SIZE     1
 
@@ -593,55 +638,114 @@ i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tilin
 }
 
 static void
-i965_render_src_surface_state(VADriverContextP ctx, 
-                              int index,
-                              dri_bo *region,
-                              unsigned long offset,
-                              int w, int h,
+i965_render_set_surface_state(struct i965_surface_state *ss,
+                              dri_bo *bo, unsigned long offset,
+                              int width, int height,
                               int pitch, int format)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);  
-    struct i965_render_state *render_state = &i965->render_state;
-    struct i965_surface_state *ss;
-    dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
     unsigned int tiling;
     unsigned int swizzle;
 
-    assert(index < MAX_RENDER_SURFACES);
-
-    dri_bo_map(ss_bo, 1);
-    assert(ss_bo->virtual);
-    ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
     memset(ss, 0, sizeof(*ss));
     ss->ss0.surface_type = I965_SURFACE_2D;
     ss->ss0.surface_format = format;
-    ss->ss0.writedisable_alpha = 0;
-    ss->ss0.writedisable_red = 0;
-    ss->ss0.writedisable_green = 0;
-    ss->ss0.writedisable_blue = 0;
     ss->ss0.color_blend = 1;
-    ss->ss0.vert_line_stride = 0;
-    ss->ss0.vert_line_stride_ofs = 0;
-    ss->ss0.mipmap_layout_mode = 0;
-    ss->ss0.render_cache_read_mode = 0;
 
-    ss->ss1.base_addr = region->offset + offset;
+    ss->ss1.base_addr = bo->offset + offset;
 
-    ss->ss2.width = w - 1;
-    ss->ss2.height = h - 1;
-    ss->ss2.mip_count = 0;
-    ss->ss2.render_target_rotation = 0;
+    ss->ss2.width = width - 1;
+    ss->ss2.height = height - 1;
 
     ss->ss3.pitch = pitch - 1;
 
-    dri_bo_get_tiling(region, &tiling, &swizzle);
+    dri_bo_get_tiling(bo, &tiling, &swizzle);
     i965_render_set_surface_tiling(ss, tiling);
+}
+
+static void
+gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
+{
+   switch (tiling) {
+   case I915_TILING_NONE:
+      ss->ss0.tiled_surface = 0;
+      ss->ss0.tile_walk = 0;
+      break;
+   case I915_TILING_X:
+      ss->ss0.tiled_surface = 1;
+      ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
+      break;
+   case I915_TILING_Y:
+      ss->ss0.tiled_surface = 1;
+      ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
+      break;
+   }
+}
+
+static void
+gen7_render_set_surface_state(struct gen7_surface_state *ss,
+                              dri_bo *bo, unsigned long offset,
+                              int width, int height,
+                              int pitch, int format)
+{
+    unsigned int tiling;
+    unsigned int swizzle;
+
+    memset(ss, 0, sizeof(*ss));
+
+    ss->ss0.surface_type = I965_SURFACE_2D;
+    ss->ss0.surface_format = format;
+
+    ss->ss1.base_addr = bo->offset + offset;
+
+    ss->ss2.width = width - 1;
+    ss->ss2.height = height - 1;
+
+    ss->ss3.pitch = pitch - 1;
+
+    dri_bo_get_tiling(bo, &tiling, &swizzle);
+    gen7_render_set_surface_tiling(ss, tiling);
+}
+
+static void
+i965_render_src_surface_state(VADriverContextP ctx, 
+                              int index,
+                              dri_bo *region,
+                              unsigned long offset,
+                              int w, int h,
+                              int pitch, int format)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);  
+    struct i965_render_state *render_state = &i965->render_state;
+    void *ss;
+    dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
+
+    assert(index < MAX_RENDER_SURFACES);
 
-    dri_bo_emit_reloc(ss_bo,
-                      I915_GEM_DOMAIN_SAMPLER, 0,
-                      offset,
-                      SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
-                      region);
+    dri_bo_map(ss_bo, 1);
+    assert(ss_bo->virtual);
+    ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
+
+    if (IS_GEN7(i965->intel.device_id)) {
+        gen7_render_set_surface_state(ss,
+                                      region, offset,
+                                      w, h,
+                                      pitch, format);
+        dri_bo_emit_reloc(ss_bo,
+                          I915_GEM_DOMAIN_SAMPLER, 0,
+                          offset,
+                          SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
+                          region);
+    } else {
+        i965_render_set_surface_state(ss,
+                                      region, offset,
+                                      w, h,
+                                      pitch, format);
+        dri_bo_emit_reloc(ss_bo,
+                          I915_GEM_DOMAIN_SAMPLER, 0,
+                          offset,
+                          SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
+                          region);
+    }
 
     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
     dri_bo_unmap(ss_bo);
@@ -729,49 +833,42 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index)
     struct i965_driver_data *i965 = i965_driver_data(ctx);  
     struct i965_render_state *render_state = &i965->render_state;
     struct intel_region *dest_region = render_state->draw_region;
-    struct i965_surface_state *ss;
+    void *ss;
     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
-
+    int format;
     assert(index < MAX_RENDER_SURFACES);
 
-    dri_bo_map(ss_bo, 1);
-    assert(ss_bo->virtual);
-    ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
-    memset(ss, 0, sizeof(*ss));
-
-    ss->ss0.surface_type = I965_SURFACE_2D;
-    ss->ss0.data_return_format = I965_SURFACERETURNFORMAT_FLOAT32;
-
     if (dest_region->cpp == 2) {
-       ss->ss0.surface_format = I965_SURFACEFORMAT_B5G6R5_UNORM;
-       } else {
-       ss->ss0.surface_format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
+       format = I965_SURFACEFORMAT_B5G6R5_UNORM;
+    } else {
+       format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
     }
 
-    ss->ss0.writedisable_alpha = 0;
-    ss->ss0.writedisable_red = 0;
-    ss->ss0.writedisable_green = 0;
-    ss->ss0.writedisable_blue = 0;
-    ss->ss0.color_blend = 1;
-    ss->ss0.vert_line_stride = 0;
-    ss->ss0.vert_line_stride_ofs = 0;
-    ss->ss0.mipmap_layout_mode = 0;
-    ss->ss0.render_cache_read_mode = 0;
-
-    ss->ss1.base_addr = dest_region->bo->offset;
-
-    ss->ss2.width = dest_region->width - 1;
-    ss->ss2.height = dest_region->height - 1;
-    ss->ss2.mip_count = 0;
-    ss->ss2.render_target_rotation = 0;
-    ss->ss3.pitch = dest_region->pitch - 1;
-    i965_render_set_surface_tiling(ss, dest_region->tiling);
-
-    dri_bo_emit_reloc(ss_bo,
-                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                      0,
-                      SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
-                      dest_region->bo);
+    dri_bo_map(ss_bo, 1);
+    assert(ss_bo->virtual);
+    ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
+
+    if (IS_GEN7(i965->intel.device_id)) {
+        gen7_render_set_surface_state(ss,
+                                      dest_region->bo, 0,
+                                      dest_region->width, dest_region->height,
+                                      dest_region->pitch, format);
+        dri_bo_emit_reloc(ss_bo,
+                          I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                          0,
+                          SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
+                          dest_region->bo);
+    } else {
+        i965_render_set_surface_state(ss,
+                                      dest_region->bo, 0,
+                                      dest_region->width, dest_region->height,
+                                      dest_region->pitch, format);
+        dri_bo_emit_reloc(ss_bo,
+                          I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                          0,
+                          SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
+                          dest_region->bo);
+    }
 
     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
     dri_bo_unmap(ss_bo);
@@ -2094,81 +2191,884 @@ gen6_render_put_subpicture(VADriverContextP ctx,
 }
 
 /*
- * global functions
+ * for GEN7
  */
-void
-intel_render_put_surface(VADriverContextP ctx,
-                        VASurfaceID surface,
-                        short srcx,
-                        short srcy,
-                        unsigned short srcw,
-                        unsigned short srch,
-                        short destx,
-                        short desty,
-                        unsigned short destw,
-                        unsigned short desth,
-                        unsigned int flag)
+static void 
+gen7_render_initialize(VADriverContextP ctx)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+    dri_bo *bo;
 
-    i965_post_processing(ctx, surface,
-                         srcx, srcy, srcw, srch,
-                         destx, desty, destw, desth,
-                         flag);
+    /* VERTEX BUFFER */
+    dri_bo_unreference(render_state->vb.vertex_buffer);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "vertex buffer",
+                      4096,
+                      4096);
+    assert(bo);
+    render_state->vb.vertex_buffer = bo;
 
-    if (IS_GEN6(i965->intel.device_id) ||
-        IS_GEN7(i965->intel.device_id))
-        gen6_render_put_surface(ctx, surface,
-                                srcx, srcy, srcw, srch,
-                                destx, desty, destw, desth,
-                                flag);
-    else
-        i965_render_put_surface(ctx, surface,
-                                srcx, srcy, srcw, srch,
-                                destx, desty, destw, desth,
-                                flag);
+    /* WM */
+    dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "surface state & binding table",
+                      (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
+                      4096);
+    assert(bo);
+    render_state->wm.surface_state_binding_table_bo = bo;
+
+    dri_bo_unreference(render_state->wm.sampler);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "sampler state",
+                      MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
+                      4096);
+    assert(bo);
+    render_state->wm.sampler = bo;
+    render_state->wm.sampler_count = 0;
+
+    /* COLOR CALCULATOR */
+    dri_bo_unreference(render_state->cc.state);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "color calc state",
+                      sizeof(struct gen6_color_calc_state),
+                      4096);
+    assert(bo);
+    render_state->cc.state = bo;
+
+    /* CC VIEWPORT */
+    dri_bo_unreference(render_state->cc.viewport);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "cc viewport",
+                      sizeof(struct i965_cc_viewport),
+                      4096);
+    assert(bo);
+    render_state->cc.viewport = bo;
+
+    /* BLEND STATE */
+    dri_bo_unreference(render_state->cc.blend);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "blend state",
+                      sizeof(struct gen6_blend_state),
+                      4096);
+    assert(bo);
+    render_state->cc.blend = bo;
+
+    /* DEPTH & STENCIL STATE */
+    dri_bo_unreference(render_state->cc.depth_stencil);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "depth & stencil state",
+                      sizeof(struct gen6_depth_stencil_state),
+                      4096);
+    assert(bo);
+    render_state->cc.depth_stencil = bo;
 }
 
-void
-intel_render_put_subpicture(VADriverContextP ctx,
-                           VASurfaceID surface,
-                           short srcx,
-                           short srcy,
-                           unsigned short srcw,
-                           unsigned short srch,
-                           short destx,
-                           short desty,
-                           unsigned short destw,
-                           unsigned short desth)
+static void
+gen7_render_color_calc_state(VADriverContextP ctx)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+    struct gen6_color_calc_state *color_calc_state;
+    
+    dri_bo_map(render_state->cc.state, 1);
+    assert(render_state->cc.state->virtual);
+    color_calc_state = render_state->cc.state->virtual;
+    memset(color_calc_state, 0, sizeof(*color_calc_state));
+    color_calc_state->constant_r = 1.0;
+    color_calc_state->constant_g = 0.0;
+    color_calc_state->constant_b = 1.0;
+    color_calc_state->constant_a = 1.0;
+    dri_bo_unmap(render_state->cc.state);
+}
 
-    if (IS_GEN6(i965->intel.device_id) ||
-        IS_GEN7(i965->intel.device_id))
-        gen6_render_put_subpicture(ctx, surface,
-                                   srcx, srcy, srcw, srch,
-                                   destx, desty, destw, desth);
-    else
-        i965_render_put_subpicture(ctx, surface,
-                                   srcx, srcy, srcw, srch,
-                                   destx, desty, destw, desth);
+static void
+gen7_render_blend_state(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+    struct gen6_blend_state *blend_state;
+    
+    dri_bo_map(render_state->cc.blend, 1);
+    assert(render_state->cc.blend->virtual);
+    blend_state = render_state->cc.blend->virtual;
+    memset(blend_state, 0, sizeof(*blend_state));
+    blend_state->blend1.logic_op_enable = 1;
+    blend_state->blend1.logic_op_func = 0xc;
+    blend_state->blend1.pre_blend_clamp_enable = 1;
+    dri_bo_unmap(render_state->cc.blend);
 }
 
-Bool 
-i965_render_init(VADriverContextP ctx)
+static void
+gen7_render_depth_stencil_state(VADriverContextP ctx)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct i965_render_state *render_state = &i965->render_state;
-    int i;
+    struct gen6_depth_stencil_state *depth_stencil_state;
+    
+    dri_bo_map(render_state->cc.depth_stencil, 1);
+    assert(render_state->cc.depth_stencil->virtual);
+    depth_stencil_state = render_state->cc.depth_stencil->virtual;
+    memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
+    dri_bo_unmap(render_state->cc.depth_stencil);
+}
 
-    /* kernel */
-    assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
-                                 sizeof(render_kernels_gen5[0])));
-    assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
-                                 sizeof(render_kernels_gen6[0])));
+static void 
+gen7_render_sampler(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+    struct gen7_sampler_state *sampler_state;
+    int i;
+    
+    assert(render_state->wm.sampler_count > 0);
+    assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
 
-    if (IS_GEN6(i965->intel.device_id) ||
-        IS_GEN7(i965->intel.device_id))
+    dri_bo_map(render_state->wm.sampler, 1);
+    assert(render_state->wm.sampler->virtual);
+    sampler_state = render_state->wm.sampler->virtual;
+    for (i = 0; i < render_state->wm.sampler_count; i++) {
+        memset(sampler_state, 0, sizeof(*sampler_state));
+        sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
+        sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
+        sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
+        sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
+        sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
+        sampler_state++;
+    }
+
+    dri_bo_unmap(render_state->wm.sampler);
+}
+
+static void
+gen7_render_setup_states(VADriverContextP ctx,
+                         VASurfaceID surface,
+                         short srcx,
+                         short srcy,
+                         unsigned short srcw,
+                         unsigned short srch,
+                         short destx,
+                         short desty,
+                         unsigned short destw,
+                         unsigned short desth)
+{
+    i965_render_dest_surface_state(ctx, 0);
+    i965_render_src_surfaces_state(ctx, surface);
+    gen7_render_sampler(ctx);
+    i965_render_cc_viewport(ctx);
+    gen7_render_color_calc_state(ctx);
+    gen7_render_blend_state(ctx);
+    gen7_render_depth_stencil_state(ctx);
+    i965_render_upload_constants(ctx);
+    i965_render_upload_vertex(ctx, surface,
+                              srcx, srcy, srcw, srch,
+                              destx, desty, destw, desth);
+}
+
+static void
+gen7_emit_invarient_states(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+
+    BEGIN_BATCH(batch, 1);
+    OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+    ADVANCE_BATCH(batch);
+
+    BEGIN_BATCH(batch, 4);
+    OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
+    OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
+              GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+
+    BEGIN_BATCH(batch, 2);
+    OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
+    OUT_BATCH(batch, 1);
+    ADVANCE_BATCH(batch);
+
+    /* Set system instruction pointer */
+    BEGIN_BATCH(batch, 2);
+    OUT_BATCH(batch, CMD_STATE_SIP | 0);
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+}
+
+static void
+gen7_emit_state_base_address(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+    struct i965_render_state *render_state = &i965->render_state;
+
+    OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
+    OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
+    OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
+    OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
+    OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
+    OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
+    OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
+    OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
+    OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
+    OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
+}
+
+static void
+gen7_emit_viewport_state_pointers(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+    struct i965_render_state *render_state = &i965->render_state;
+
+    BEGIN_BATCH(batch, 2);
+    OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
+    OUT_RELOC(batch,
+              render_state->cc.viewport,
+              I915_GEM_DOMAIN_INSTRUCTION, 0,
+              0);
+    ADVANCE_BATCH(batch);
+
+    BEGIN_BATCH(batch, 2);
+    OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+}
+
+/*
+ * URB layout on GEN7 
+ * ----------------------------------------
+ * | PS Push Constants (8KB) | VS entries |
+ * ----------------------------------------
+ */
+static void
+gen7_emit_urb(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+
+    BEGIN_BATCH(batch, 2);
+    OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
+    OUT_BATCH(batch, 8); /* in 1KBs */
+    ADVANCE_BATCH(batch);
+
+    BEGIN_BATCH(batch, 2);
+    OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
+    OUT_BATCH(batch, 
+              (32 << GEN7_URB_ENTRY_NUMBER_SHIFT) | /* at least 32 */
+              (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
+              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+   ADVANCE_BATCH(batch);
+
+   BEGIN_BATCH(batch, 2);
+   OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
+   OUT_BATCH(batch,
+             (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+             (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+   ADVANCE_BATCH(batch);
+
+   BEGIN_BATCH(batch, 2);
+   OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
+   OUT_BATCH(batch,
+             (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+             (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+   ADVANCE_BATCH(batch);
+
+   BEGIN_BATCH(batch, 2);
+   OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
+   OUT_BATCH(batch,
+             (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+             (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+   ADVANCE_BATCH(batch);
+}
+
+static void
+gen7_emit_cc_state_pointers(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+    struct i965_render_state *render_state = &i965->render_state;
+
+    BEGIN_BATCH(batch, 2);
+    OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
+    OUT_RELOC(batch,
+              render_state->cc.state,
+              I915_GEM_DOMAIN_INSTRUCTION, 0,
+              1);
+    ADVANCE_BATCH(batch);
+
+    BEGIN_BATCH(batch, 2);
+    OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
+    OUT_RELOC(batch,
+              render_state->cc.blend,
+              I915_GEM_DOMAIN_INSTRUCTION, 0,
+              1);
+    ADVANCE_BATCH(batch);
+
+    BEGIN_BATCH(batch, 2);
+    OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
+    OUT_RELOC(batch,
+              render_state->cc.depth_stencil,
+              I915_GEM_DOMAIN_INSTRUCTION, 0, 
+              1);
+    ADVANCE_BATCH(batch);
+}
+
+static void
+gen7_emit_sampler_state_pointers(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+    struct i965_render_state *render_state = &i965->render_state;
+
+    BEGIN_BATCH(batch, 2);
+    OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
+    OUT_RELOC(batch,
+              render_state->wm.sampler,
+              I915_GEM_DOMAIN_INSTRUCTION, 0,
+              0);
+    ADVANCE_BATCH(batch);
+}
+
+static void
+gen7_emit_binding_table(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+
+    BEGIN_BATCH(batch, 2);
+    OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
+    OUT_BATCH(batch, BINDING_TABLE_OFFSET);
+    ADVANCE_BATCH(batch);
+}
+
+static void
+gen7_emit_depth_buffer_state(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+
+    BEGIN_BATCH(batch, 7);
+    OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
+    OUT_BATCH(batch,
+              (I965_DEPTHFORMAT_D32_FLOAT << 18) |
+              (I965_SURFACE_NULL << 29));
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+
+    BEGIN_BATCH(batch, 3);
+    OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+}
+
+static void
+gen7_emit_drawing_rectangle(VADriverContextP ctx)
+{
+    i965_render_drawing_rectangle(ctx);
+}
+
+static void 
+gen7_emit_vs_state(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+
+    /* disable VS constant buffer */
+    OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+       
+    OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
+    OUT_BATCH(batch, 0); /* without VS kernel */
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0); /* pass-through */
+}
+
+static void 
+gen7_emit_bypass_state(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+
+    /* bypass GS */
+    BEGIN_BATCH(batch, 7);
+    OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+
+    BEGIN_BATCH(batch, 7);     
+    OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
+    OUT_BATCH(batch, 0); /* without GS kernel */
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0); /* pass-through */
+    ADVANCE_BATCH(batch);
+
+    BEGIN_BATCH(batch, 2);
+    OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+
+    /* disable HS */
+    BEGIN_BATCH(batch, 7);
+    OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+
+    BEGIN_BATCH(batch, 7);
+    OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+
+    BEGIN_BATCH(batch, 2);
+    OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+
+    /* Disable TE */
+    BEGIN_BATCH(batch, 4);
+    OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+
+    /* Disable DS */
+    BEGIN_BATCH(batch, 7);
+    OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+
+    BEGIN_BATCH(batch, 6);
+    OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+
+    BEGIN_BATCH(batch, 2);
+    OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+
+    /* Disable STREAMOUT */
+    BEGIN_BATCH(batch, 3);
+    OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+}
+
+static void 
+gen7_emit_clip_state(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+
+    OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0); /* pass-through */
+    OUT_BATCH(batch, 0);
+}
+
+static void 
+gen7_emit_sf_state(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+
+    BEGIN_BATCH(batch, 14);
+    OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
+    OUT_BATCH(batch,
+              (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
+              (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
+              (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0); /* DW4 */
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0); /* DW9 */
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+
+    BEGIN_BATCH(batch, 7);
+    OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
+    OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+}
+
+static void 
+gen7_emit_wm_state(VADriverContextP ctx, int kernel)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+    struct i965_render_state *render_state = &i965->render_state;
+
+    BEGIN_BATCH(batch, 3);
+    OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
+    OUT_BATCH(batch,
+              GEN7_WM_DISPATCH_ENABLE |
+              GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+
+    BEGIN_BATCH(batch, 7);
+    OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
+    OUT_BATCH(batch, 1);
+    OUT_BATCH(batch, 0);
+    OUT_RELOC(batch, 
+              render_state->curbe.bo,
+              I915_GEM_DOMAIN_INSTRUCTION, 0,
+              0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+
+    BEGIN_BATCH(batch, 8);
+    OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
+    OUT_RELOC(batch, 
+              render_state->render_kernels[kernel].bo,
+              I915_GEM_DOMAIN_INSTRUCTION, 0,
+              0);
+    OUT_BATCH(batch, 
+              (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
+              (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+    OUT_BATCH(batch, 0); /* scratch space base offset */
+    OUT_BATCH(batch, 
+              ((86 - 1) << GEN7_PS_MAX_THREADS_SHIFT) |
+              GEN7_PS_PUSH_CONSTANT_ENABLE |
+              GEN7_PS_ATTRIBUTE_ENABLE |
+              GEN7_PS_16_DISPATCH_ENABLE);
+    OUT_BATCH(batch, 
+              (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
+    OUT_BATCH(batch, 0); /* kernel 1 pointer */
+    OUT_BATCH(batch, 0); /* kernel 2 pointer */
+    ADVANCE_BATCH(batch);
+}
+
+static void
+gen7_emit_vertex_element_state(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+
+    /* Set up our vertex elements, sourced from the single vertex buffer. */
+    OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
+    /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
+    OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+              GEN6_VE0_VALID |
+              (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+              (0 << VE0_OFFSET_SHIFT));
+    OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+              (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+    /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
+    OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+              GEN6_VE0_VALID |
+              (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+              (8 << VE0_OFFSET_SHIFT));
+    OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
+              (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+}
+
+static void
+gen7_emit_vertices(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+    struct i965_render_state *render_state = &i965->render_state;
+
+    BEGIN_BATCH(batch, 5);
+    OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
+    OUT_BATCH(batch, 
+              (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
+              GEN6_VB0_VERTEXDATA |
+              GEN7_VB0_ADDRESS_MODIFYENABLE |
+              ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
+    OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
+    OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+
+    BEGIN_BATCH(batch, 7);
+    OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
+    OUT_BATCH(batch,
+              _3DPRIM_RECTLIST |
+              GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
+    OUT_BATCH(batch, 3); /* vertex count per instance */
+    OUT_BATCH(batch, 0); /* start vertex offset */
+    OUT_BATCH(batch, 1); /* single instance */
+    OUT_BATCH(batch, 0); /* start instance location */
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+}
+
+static void
+gen7_render_emit_states(VADriverContextP ctx, int kernel)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+
+    intel_batchbuffer_start_atomic(batch, 0x1000);
+    intel_batchbuffer_emit_mi_flush(batch);
+    gen7_emit_invarient_states(ctx);
+    gen7_emit_state_base_address(ctx);
+    gen7_emit_viewport_state_pointers(ctx);
+    gen7_emit_urb(ctx);
+    gen7_emit_cc_state_pointers(ctx);
+    gen7_emit_sampler_state_pointers(ctx);
+    gen7_emit_bypass_state(ctx);
+    gen7_emit_vs_state(ctx);
+    gen7_emit_clip_state(ctx);
+    gen7_emit_sf_state(ctx);
+    gen7_emit_wm_state(ctx, kernel);
+    gen7_emit_binding_table(ctx);
+    gen7_emit_depth_buffer_state(ctx);
+    gen7_emit_drawing_rectangle(ctx);
+    gen7_emit_vertex_element_state(ctx);
+    gen7_emit_vertices(ctx);
+    intel_batchbuffer_end_atomic(batch);
+}
+
+static void
+gen7_render_put_surface(VADriverContextP ctx,
+                        VASurfaceID surface,
+                        short srcx,
+                        short srcy,
+                        unsigned short srcw,
+                        unsigned short srch,
+                        short destx,
+                        short desty,
+                        unsigned short destw,
+                        unsigned short desth,
+                        unsigned int flag)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+
+    gen7_render_initialize(ctx);
+    gen7_render_setup_states(ctx, surface,
+                             srcx, srcy, srcw, srch,
+                             destx, desty, destw, desth);
+    i965_clear_dest_region(ctx);
+    gen7_render_emit_states(ctx, PS_KERNEL);
+    intel_batchbuffer_flush(batch);
+}
+
+static void
+gen7_subpicture_render_blend_state(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+    struct gen6_blend_state *blend_state;
+
+    dri_bo_unmap(render_state->cc.state);    
+    dri_bo_map(render_state->cc.blend, 1);
+    assert(render_state->cc.blend->virtual);
+    blend_state = render_state->cc.blend->virtual;
+    memset(blend_state, 0, sizeof(*blend_state));
+    blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
+    blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
+    blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
+    blend_state->blend0.blend_enable = 1;
+    blend_state->blend1.post_blend_clamp_enable = 1;
+    blend_state->blend1.pre_blend_clamp_enable = 1;
+    blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
+    dri_bo_unmap(render_state->cc.blend);
+}
+
+static void
+gen7_subpicture_render_setup_states(VADriverContextP ctx,
+                                    VASurfaceID surface,
+                                    short srcx,
+                                    short srcy,
+                                    unsigned short srcw,
+                                    unsigned short srch,
+                                    short destx,
+                                    short desty,
+                                    unsigned short destw,
+                                    unsigned short desth)
+{
+    VARectangle output_rect;
+
+    output_rect.x      = destx;
+    output_rect.y      = desty;
+    output_rect.width  = destw;
+    output_rect.height = desth;
+
+    i965_render_dest_surface_state(ctx, 0);
+    i965_subpic_render_src_surfaces_state(ctx, surface);
+    i965_render_sampler(ctx);
+    i965_render_cc_viewport(ctx);
+    gen7_render_color_calc_state(ctx);
+    gen7_subpicture_render_blend_state(ctx);
+    gen7_render_depth_stencil_state(ctx);
+    i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
+}
+
+static void
+gen7_render_put_subpicture(VADriverContextP ctx,
+                           VASurfaceID surface,
+                           short srcx,
+                           short srcy,
+                           unsigned short srcw,
+                           unsigned short srch,
+                           short destx,
+                           short desty,
+                           unsigned short destw,
+                           unsigned short desth)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = i965->batch;
+    struct object_surface *obj_surface = SURFACE(surface);
+    struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
+
+    assert(obj_subpic);
+    gen7_render_initialize(ctx);
+    gen7_subpicture_render_setup_states(ctx, surface,
+                                        srcx, srcy, srcw, srch,
+                                        destx, desty, destw, desth);
+    gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
+    i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
+    intel_batchbuffer_flush(batch);
+}
+
+
+/*
+ * global functions
+ */
+void
+intel_render_put_surface(VADriverContextP ctx,
+                        VASurfaceID surface,
+                        short srcx,
+                        short srcy,
+                        unsigned short srcw,
+                        unsigned short srch,
+                        short destx,
+                        short desty,
+                        unsigned short destw,
+                        unsigned short desth,
+                        unsigned int flag)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+    i965_post_processing(ctx, surface,
+                         srcx, srcy, srcw, srch,
+                         destx, desty, destw, desth,
+                         flag);
+
+    if (IS_GEN7(i965->intel.device_id))
+        gen7_render_put_surface(ctx, surface,
+                                srcx, srcy, srcw, srch,
+                                destx, desty, destw, desth,
+                                flag);
+    else if (IS_GEN6(i965->intel.device_id))
+        gen6_render_put_surface(ctx, surface,
+                                srcx, srcy, srcw, srch,
+                                destx, desty, destw, desth,
+                                flag);
+    else
+        i965_render_put_surface(ctx, surface,
+                                srcx, srcy, srcw, srch,
+                                destx, desty, destw, desth,
+                                flag);
+}
+
+void
+intel_render_put_subpicture(VADriverContextP ctx,
+                           VASurfaceID surface,
+                           short srcx,
+                           short srcy,
+                           unsigned short srcw,
+                           unsigned short srch,
+                           short destx,
+                           short desty,
+                           unsigned short destw,
+                           unsigned short desth)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+    if (IS_GEN7(i965->intel.device_id))
+        gen7_render_put_subpicture(ctx, surface,
+                                   srcx, srcy, srcw, srch,
+                                   destx, desty, destw, desth);
+    else if (IS_GEN6(i965->intel.device_id))
+        gen6_render_put_subpicture(ctx, surface,
+                                   srcx, srcy, srcw, srch,
+                                   destx, desty, destw, desth);
+    else
+        i965_render_put_subpicture(ctx, surface,
+                                   srcx, srcy, srcw, srch,
+                                   destx, desty, destw, desth);
+}
+
+Bool 
+i965_render_init(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+    int i;
+
+    /* kernel */
+    assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
+                                 sizeof(render_kernels_gen5[0])));
+    assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
+                                 sizeof(render_kernels_gen6[0])));
+
+    if (IS_GEN7(i965->intel.device_id))
+        memcpy(render_state->render_kernels, render_kernels_gen7, sizeof(render_state->render_kernels));
+    else if (IS_GEN6(i965->intel.device_id))
         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
     else if (IS_IRONLAKE(i965->intel.device_id))
         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
index 8f81b75..df59e45 100644 (file)
@@ -1128,4 +1128,128 @@ struct gen6_interface_descriptor_data
     } desc7;
 };
 
+struct gen7_surface_state
+{
+    struct {
+        unsigned int cube_pos_z:1;
+        unsigned int cube_neg_z:1;
+        unsigned int cube_pos_y:1;
+        unsigned int cube_neg_y:1;
+        unsigned int cube_pos_x:1;
+        unsigned int cube_neg_x:1;
+        unsigned int pad2:2;
+        unsigned int render_cache_read_write:1;
+        unsigned int pad1:1;
+        unsigned int surface_array_spacing:1;
+        unsigned int vert_line_stride_ofs:1;
+        unsigned int vert_line_stride:1;
+        unsigned int tile_walk:1;
+        unsigned int tiled_surface:1;
+        unsigned int horizontal_alignment:1;
+        unsigned int vertical_alignment:2;
+        unsigned int surface_format:9;     /**< BRW_SURFACEFORMAT_x */
+        unsigned int pad0:1;
+        unsigned int is_array:1;
+        unsigned int surface_type:3;       /**< BRW_SURFACE_1D/2D/3D/CUBE */
+    } ss0;
+
+    struct {
+        unsigned int base_addr;
+    } ss1;
+
+    struct {
+        unsigned int width:14;
+        unsigned int pad1:2;
+        unsigned int height:14;
+        unsigned int pad0:2;
+    } ss2;
+
+    struct {
+        unsigned int pitch:18;
+        unsigned int pad:3;
+        unsigned int depth:11;
+    } ss3;
+
+    struct {
+        unsigned int multisample_position_palette_index:3;
+        unsigned int num_multisamples:3;
+        unsigned int multisampled_surface_storage_format:1;
+        unsigned int render_target_view_extent:11;
+        unsigned int min_array_elt:11;
+        unsigned int rotation:2;
+        unsigned int pad0:1;
+    } ss4;
+
+    struct {
+        unsigned int mip_count:4;
+        unsigned int min_lod:4;
+        unsigned int pad1:12;
+        unsigned int y_offset:4;
+        unsigned int pad0:1;
+        unsigned int x_offset:7;
+    } ss5;
+
+    struct {
+        unsigned int pad; /* Multisample Control Surface stuff */
+    } ss6;
+
+    struct {
+        unsigned int resource_min_lod:12;
+        unsigned int pad0:16;
+        unsigned int alpha_clear_color:1;
+        unsigned int blue_clear_color:1;
+        unsigned int green_clear_color:1;
+        unsigned int red_clear_color:1;
+    } ss7;
+};
+
+struct gen7_sampler_state
+{
+   struct
+   {
+      unsigned int aniso_algorithm:1;
+      unsigned int lod_bias:13;
+      unsigned int min_filter:3;
+      unsigned int mag_filter:3;
+      unsigned int mip_filter:2;
+      unsigned int base_level:5;
+      unsigned int pad1:1;
+      unsigned int lod_preclamp:1;
+      unsigned int default_color_mode:1;
+      unsigned int pad0:1;
+      unsigned int disable:1;
+   } ss0;
+
+   struct
+   {
+      unsigned int cube_control_mode:1;
+      unsigned int shadow_function:3;
+      unsigned int pad:4;
+      unsigned int max_lod:12;
+      unsigned int min_lod:12;
+   } ss1;
+
+   struct
+   {
+      unsigned int pad:5;
+      unsigned int default_color_pointer:27;
+   } ss2;
+
+   struct
+   {
+      unsigned int r_wrap_mode:3;
+      unsigned int t_wrap_mode:3;
+      unsigned int s_wrap_mode:3;
+      unsigned int pad:1;
+      unsigned int non_normalized_coord:1;
+      unsigned int trilinear_quality:2;
+      unsigned int address_round:6;
+      unsigned int max_aniso:3;
+      unsigned int chroma_key_mode:1;
+      unsigned int chroma_key_index:2;
+      unsigned int chroma_key_enable:1;
+      unsigned int pad0:6;
+   } ss3;
+};
+
 #endif /* _I965_STRUCTS_H_ */