i965_drv_video: encode on Ivybridge
authorXiang, Haihao <haihao.xiang@intel.com>
Fri, 10 Jun 2011 02:48:16 +0000 (10:48 +0800)
committerXiang, Haihao <haihao.xiang@intel.com>
Fri, 10 Jun 2011 03:08:42 +0000 (11:08 +0800)
Signed-off-by: Xiang, Haihao <haihao.xiang@intel.com>
i965_drv_video/gen6_mfc.c
i965_drv_video/gen6_vme.c
i965_drv_video/i965_defines.h
i965_drv_video/i965_drv_video.c
i965_drv_video/i965_structs.h
i965_drv_video/intel_batchbuffer.c
i965_drv_video/intel_driver.h

index 45e10fe..3514402 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2010 Intel Corporation
+ * Copyright © 2010-2011 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
@@ -43,7 +43,7 @@ gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen
 {
     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
 
-    BEGIN_BCS_BATCH(batch,4);
+    BEGIN_BCS_BATCH(batch, 4);
 
     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
     OUT_BCS_BATCH(batch,
@@ -71,6 +71,42 @@ gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen
 }
 
 static void
+gen7_mfc_pipe_mode_select(VADriverContextP ctx,
+                          int standard_select,
+                          struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+
+    assert(standard_select == MFX_FORMAT_MPEG2 ||
+           standard_select == MFX_FORMAT_AVC);
+
+    BEGIN_BCS_BATCH(batch, 5);
+    OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
+    OUT_BCS_BATCH(batch,
+                  (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
+                  (MFD_MODE_VLD << 15) | /* VLD mode */
+                  (0 << 10) | /* disable Stream-Out */
+                  (1 << 9)  | /* Post Deblocking Output */
+                  (0 << 8)  | /* Pre Deblocking Output */
+                  (0 << 5)  | /* not in stitch mode */
+                  (1 << 4)  | /* encoding mode */
+                  (standard_select << 0));  /* standard select: avc or mpeg2 */
+    OUT_BCS_BATCH(batch,
+                  (0 << 7)  | /* expand NOA bus flag */
+                  (0 << 6)  | /* disable slice-level clock gating */
+                  (0 << 5)  | /* disable clock gating for NOA */
+                  (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
+                  (0 << 3)  | /* terminate if AVC mbdata error occurs */
+                  (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
+                  (0 << 1)  |
+                  (0 << 0));
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
 gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
 {
     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
@@ -99,6 +135,34 @@ gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_e
 }
 
 static void
+gen7_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+    struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+
+    BEGIN_BCS_BATCH(batch, 6);
+
+    OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch,
+                  ((mfc_context->surface_state.height - 1) << 18) |
+                  ((mfc_context->surface_state.width - 1) << 4));
+    OUT_BCS_BATCH(batch,
+                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+                  (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
+                  (0 << 22) | /* surface object control state, FIXME??? */
+                  ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
+                  (0 << 2)  | /* must be 0 for interleave U/V */
+                  (1 << 1)  | /* must be tiled */
+                  (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
+    OUT_BCS_BATCH(batch,
+                  (0 << 16) |                                                          /* must be 0 for interleave U/V */
+                  (mfc_context->surface_state.h_pitch));               /* y offset for U(cb) */
+    OUT_BCS_BATCH(batch, 0);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
 gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
 {
     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
@@ -167,6 +231,31 @@ gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_conte
 }
 
 static void
+gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+
+    BEGIN_BCS_BATCH(batch, 11);
+
+    OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    /* MFX Indirect MV Object Base Address */
+    OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+    OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    /*MFC Indirect PAK-BSE Object Base Address for Encoder*/   
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
 gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
 {
     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
@@ -238,6 +327,63 @@ gen6_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_e
     ADVANCE_BCS_BATCH(batch);
 }
 
+static void
+gen7_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+    struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+
+    int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
+    int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
+
+    BEGIN_BCS_BATCH(batch, 16);
+    OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
+    OUT_BCS_BATCH(batch,
+                  ((width_in_mbs * height_in_mbs) & 0xFFFF));
+    OUT_BCS_BATCH(batch, 
+                  ((height_in_mbs - 1) << 16) | 
+                  ((width_in_mbs - 1) << 0));
+    OUT_BCS_BATCH(batch, 
+                  (0 << 24) |  /* Second Chroma QP Offset */
+                  (0 << 16) |  /* Chroma QP Offset */
+                  (0 << 14) |   /* Max-bit conformance Intra flag */
+                  (0 << 13) |   /* Max Macroblock size conformance Inter flag */
+                  (0 << 12) |   /* FIXME: Weighted_Pred_Flag */
+                  (0 << 10) |   /* FIXME: Weighted_BiPred_Idc */
+                  (0 << 8)  |   /* FIXME: Image Structure */
+                  (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
+    OUT_BCS_BATCH(batch,
+                  (0 << 16) |   /* Mininum Frame size */
+                  (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
+                  (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
+                  (0 << 13) |   /* CABAC 0 word insertion test enable */
+                  (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
+                  (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
+                  (0 << 9)  |   /* FIXME: MbMvFormatFlag */
+                  (1 << 7)  |   /* 0:CAVLC encoding mode,1:CABAC */
+                  (0 << 6)  |   /* Only valid for VLD decoding mode */
+                  (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
+                  (0 << 4)  |   /* Direct 8x8 inference flag */
+                  (0 << 3)  |   /* Only 8x8 IDCT Transform Mode Flag */
+                  (1 << 2)  |   /* Frame MB only flag */
+                  (0 << 1)  |   /* MBAFF mode is in active */
+                  (0 << 0));    /* Field picture flag */
+    OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
+    OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
+                  (0xBB8 << 16) |       /* InterMbMaxSz */
+                  (0xEE8) );            /* IntraMbMaxSz */
+    OUT_BCS_BATCH(batch, 0);            /* Reserved */
+    OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
+    OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */       
+    OUT_BCS_BATCH(batch, 0x8C000000);
+    OUT_BCS_BATCH(batch, 0x00010000);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+}
 
 static void gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
 {
@@ -355,6 +501,82 @@ static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_con
     ADVANCE_BCS_BATCH(batch);  
 }
 
+static void
+gen7_mfc_qm_state(VADriverContextP ctx,
+                  int qm_type,
+                  unsigned int *qm,
+                  int qm_length,
+                  struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+    unsigned int qm_buffer[16];
+
+    assert(qm_length <= 16);
+    assert(sizeof(*qm) == 4);
+    memcpy(qm_buffer, qm, qm_length * 4);
+
+    BEGIN_BCS_BATCH(batch, 18);
+    OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
+    OUT_BCS_BATCH(batch, qm_type << 0);
+    intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void gen7_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+{
+    unsigned int qm[16] = {
+        0x10101010, 0x10101010, 0x10101010, 0x10101010,
+        0x10101010, 0x10101010, 0x10101010, 0x10101010,
+        0x10101010, 0x10101010, 0x10101010, 0x10101010,
+        0x10101010, 0x10101010, 0x10101010, 0x10101010
+    };
+
+    gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context);
+    gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context);
+    gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context);
+    gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context);
+}
+
+static void
+gen7_mfc_fqm_state(VADriverContextP ctx,
+                   int fqm_type,
+                   unsigned int *fqm,
+                   int fqm_length,
+                   struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+    unsigned int fqm_buffer[32];
+
+    assert(fqm_length <= 32);
+    assert(sizeof(*fqm) == 4);
+    memcpy(fqm_buffer, fqm, fqm_length * 4);
+
+    BEGIN_BCS_BATCH(batch, 34);
+    OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
+    OUT_BCS_BATCH(batch, fqm_type << 0);
+    intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+{
+    unsigned int qm[32] = {
+        0x10001000, 0x10001000, 0x10001000, 0x10001000,
+        0x10001000, 0x10001000, 0x10001000, 0x10001000,
+        0x10001000, 0x10001000, 0x10001000, 0x10001000,
+        0x10001000, 0x10001000, 0x10001000, 0x10001000,
+        0x10001000, 0x10001000, 0x10001000, 0x10001000,
+        0x10001000, 0x10001000, 0x10001000, 0x10001000,
+        0x10001000, 0x10001000, 0x10001000, 0x10001000,
+        0x10001000, 0x10001000, 0x10001000, 0x10001000
+    };
+
+    gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context);
+    gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context);
+    gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context);
+    gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context);
+}
+
 static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
 {
     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
@@ -373,7 +595,6 @@ static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder
     ADVANCE_BCS_BATCH(batch);
 }
        
-       
 static void
 gen6_mfc_avc_insert_object(VADriverContextP ctx, int flush_data, struct gen6_encoder_context *gen6_encoder_context)
 {
@@ -535,6 +756,7 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
                                       struct encode_state *encode_state,
                                       struct gen6_encoder_context *gen6_encoder_context)
 {
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
@@ -561,18 +783,32 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
 
             if (emit_new_state) {
                 intel_batchbuffer_emit_mi_flush(batch);
-                gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context);
-                gen6_mfc_surface_state(ctx, gen6_encoder_context);
+                
+                if (IS_GEN7(i965->intel.device_id)) {
+                    gen7_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context);
+                    gen7_mfc_surface_state(ctx, gen6_encoder_context);
+                    gen7_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
+                } else {
+                    gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context);
+                    gen6_mfc_surface_state(ctx, gen6_encoder_context);
+                    gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
+                }
+
                 gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context);
-                gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
                 gen6_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context);
-                gen6_mfc_avc_img_state(ctx, gen6_encoder_context);
-                gen6_mfc_avc_qm_state(ctx, gen6_encoder_context);
-                gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context);
+
+                if (IS_GEN7(i965->intel.device_id)) {
+                    gen7_mfc_avc_img_state(ctx, gen6_encoder_context);
+                    gen7_mfc_avc_qm_state(ctx, gen6_encoder_context);
+                    gen7_mfc_avc_fqm_state(ctx, gen6_encoder_context);
+                } else {
+                    gen6_mfc_avc_img_state(ctx, gen6_encoder_context);
+                    gen6_mfc_avc_qm_state(ctx, gen6_encoder_context);
+                    gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context);
+                }
+
                 gen6_mfc_avc_ref_idx_state(ctx, gen6_encoder_context);
-                /*gen6_mfc_avc_directmode_state(ctx);*/
                 gen6_mfc_avc_slice_state(ctx, is_intra, gen6_encoder_context);
-                /*gen6_mfc_avc_insert_object(ctx, 0);*/
                 emit_new_state = 0;
             }
 
index 61ddcc4..09a042f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2009 Intel Corporation
+ * Copyright © 2010-2011 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
 #include "gen6_vme.h"
 #include "i965_encoder.h"
 
-#define SURFACE_STATE_PADDED_SIZE_0     ALIGN(sizeof(struct i965_surface_state), 32)
-#define SURFACE_STATE_PADDED_SIZE_1     ALIGN(sizeof(struct i965_surface_state2), 32)
-#define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_0, SURFACE_STATE_PADDED_SIZE_1)
-#define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
-#define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6)
+#define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
+#define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
+#define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
+
+#define SURFACE_STATE_PADDED_SIZE_0_GEN6        ALIGN(sizeof(struct i965_surface_state), 32)
+#define SURFACE_STATE_PADDED_SIZE_1_GEN6        ALIGN(sizeof(struct i965_surface_state2), 32)
+#define SURFACE_STATE_PADDED_SIZE_GEN6          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN7)
+
+#define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
+#define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
+#define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6)
 
 #define VME_INTRA_SHADER       0       
 #define VME_INTER_SHADER       1
   
 static const uint32_t gen6_vme_intra_frame[][4] = {
 #include "shaders/vme/intra_frame.g6b"
-    {0,0,0,0}
 };
 
 static const uint32_t gen6_vme_inter_frame[][4] = {
 #include "shaders/vme/inter_frame.g6b"
-    {0,0,0,0}
 };
 
 static struct i965_kernel gen6_vme_kernels[] = {
@@ -80,6 +84,31 @@ static struct i965_kernel gen6_vme_kernels[] = {
     }
 };
 
+static const uint32_t gen7_vme_intra_frame[][4] = {
+#include "shaders/vme/intra_frame.g7b"
+};
+
+static const uint32_t gen7_vme_inter_frame[][4] = {
+#include "shaders/vme/inter_frame.g7b"
+};
+
+static struct i965_kernel gen7_vme_kernels[] = {
+    {
+        "VME Intra Frame",
+        VME_INTRA_SHADER,                                                                              /*index*/
+        gen7_vme_intra_frame,                  
+        sizeof(gen7_vme_intra_frame),          
+        NULL
+    },
+    {
+        "VME inter Frame",
+        VME_INTER_SHADER,
+        gen7_vme_inter_frame,
+        sizeof(gen7_vme_inter_frame),
+        NULL
+    }
+};
+
 static void
 gen6_vme_set_common_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
 {
@@ -308,6 +337,246 @@ static VAStatus gen6_vme_surface_setup(VADriverContextP ctx,
     return VA_STATUS_SUCCESS;
 }
 
+/*
+ * Surface state for IvyBridge
+ */
+static void
+gen7_vme_set_common_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
+{
+    switch (tiling) {
+    case I915_TILING_NONE:
+        ss->ss0.tiled_surface = 0;
+        ss->ss0.tile_walk = 0;
+        break;
+    case I915_TILING_X:
+        ss->ss0.tiled_surface = 1;
+        ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
+        break;
+    case I915_TILING_Y:
+        ss->ss0.tiled_surface = 1;
+        ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
+        break;
+    }
+}
+
+static void
+gen7_vme_set_source_surface_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
+{
+    switch (tiling) {
+    case I915_TILING_NONE:
+        ss->ss2.tiled_surface = 0;
+        ss->ss2.tile_walk = 0;
+        break;
+    case I915_TILING_X:
+        ss->ss2.tiled_surface = 1;
+        ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
+        break;
+    case I915_TILING_Y:
+        ss->ss2.tiled_surface = 1;
+        ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
+        break;
+    }
+}
+
+/* only used for VME source surface state */
+static void gen7_vme_source_surface_state(VADriverContextP ctx,
+                                          int index,
+                                          struct object_surface *obj_surface,
+                                          struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+    struct gen7_surface_state2 *ss;
+    dri_bo *bo;
+    int w, h, w_pitch, h_pitch;
+    unsigned int tiling, swizzle;
+
+    assert(obj_surface->bo);
+
+    w = obj_surface->orig_width;
+    h = obj_surface->orig_height;
+    w_pitch = obj_surface->width;
+    h_pitch = obj_surface->height;
+
+    bo = vme_context->surface_state_binding_table.bo;
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+
+    ss = (struct gen7_surface_state2 *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
+    memset(ss, 0, sizeof(*ss));
+
+    ss->ss0.surface_base_address = obj_surface->bo->offset;
+
+    ss->ss1.cbcr_pixel_offset_v_direction = 2;
+    ss->ss1.width = w - 1;
+    ss->ss1.height = h - 1;
+
+    ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
+    ss->ss2.interleave_chroma = 1;
+    ss->ss2.pitch = w_pitch - 1;
+    ss->ss2.half_pitch_for_chroma = 0;
+
+    dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
+    gen7_vme_set_source_surface_tiling(ss, tiling);
+
+    /* UV offset for interleave mode */
+    ss->ss3.x_offset_for_cb = 0;
+    ss->ss3.y_offset_for_cb = h_pitch;
+
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_RENDER, 0,
+                      0,
+                      SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
+                      obj_surface->bo);
+
+    ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+    dri_bo_unmap(bo);
+}
+
+static void
+gen7_vme_media_source_surface_state(VADriverContextP ctx,
+                                    int index,
+                                    struct object_surface *obj_surface,
+                                    struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+    struct gen7_surface_state *ss;
+    dri_bo *bo;
+    int w, h, w_pitch;
+    unsigned int tiling, swizzle;
+
+    /* Y plane */
+    w = obj_surface->orig_width;
+    h = obj_surface->orig_height;
+    w_pitch = obj_surface->width;
+
+    bo = vme_context->surface_state_binding_table.bo;
+    dri_bo_map(bo, True);
+    assert(bo->virtual);
+
+    ss = (struct gen7_surface_state *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
+    memset(ss, 0, sizeof(*ss));
+
+    ss->ss0.surface_type = I965_SURFACE_2D;
+    ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
+
+    ss->ss1.base_addr = obj_surface->bo->offset;
+
+    ss->ss2.width = w / 4 - 1;
+    ss->ss2.height = h - 1;
+
+    ss->ss3.pitch = w_pitch - 1;
+
+    dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
+    gen7_vme_set_common_surface_tiling(ss, tiling);
+
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_RENDER, 0,
+                      0,
+                      SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
+                      obj_surface->bo);
+
+    ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+    dri_bo_unmap(bo);
+}
+
+static VAStatus
+gen7_vme_output_buffer_setup(VADriverContextP ctx,
+                             struct encode_state *encode_state,
+                             int index,
+                             struct gen6_encoder_context *gen6_encoder_context)
+
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+    struct gen7_surface_state *ss;
+    dri_bo *bo;
+    VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
+    VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
+    int is_intra = pSliceParameter->slice_flags.bits.is_intra;
+    int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+    int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+    int num_entries;
+
+    if ( is_intra ) {
+        vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
+    } else {
+        vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs * 4;
+    }
+    vme_context->vme_output.size_block = 16; /* an OWORD */
+    vme_context->vme_output.pitch = ALIGN(vme_context->vme_output.size_block, 16);
+    bo = dri_bo_alloc(i965->intel.bufmgr, 
+                      "VME output buffer",
+                      vme_context->vme_output.num_blocks * vme_context->vme_output.pitch,
+                      0x1000);
+    assert(bo);
+    vme_context->vme_output.bo = bo;
+
+    bo = vme_context->surface_state_binding_table.bo;
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+
+    ss = (struct gen7_surface_state *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
+    ss = bo->virtual;
+    memset(ss, 0, sizeof(*ss));
+
+    /* always use 16 bytes as pitch on Sandy Bridge */
+    num_entries = vme_context->vme_output.num_blocks * vme_context->vme_output.pitch / 16;
+
+    ss->ss0.surface_type = I965_SURFACE_BUFFER;
+
+    ss->ss1.base_addr = vme_context->vme_output.bo->offset;
+
+    ss->ss2.width = ((num_entries - 1) & 0x7f);
+    ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
+    ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
+
+    ss->ss3.pitch = vme_context->vme_output.pitch - 1;
+
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                      0,
+                      SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
+                      vme_context->vme_output.bo);
+
+    ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+    dri_bo_unmap(bo);
+
+    return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen7_vme_surface_setup(VADriverContextP ctx, 
+                                       struct encode_state *encode_state,
+                                       int is_intra,
+                                       struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface;
+    VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
+
+    /*Setup surfaces state*/
+    /* current picture for encoding */
+    obj_surface = SURFACE(encode_state->current_render_target);
+    assert(obj_surface);
+    gen7_vme_source_surface_state(ctx, 1, obj_surface, gen6_encoder_context);
+    gen7_vme_media_source_surface_state(ctx, 4, obj_surface, gen6_encoder_context);
+
+    if ( ! is_intra ) {
+        /* reference 0 */
+        obj_surface = SURFACE(pPicParameter->reference_picture);
+        assert(obj_surface);
+        gen7_vme_source_surface_state(ctx, 2, obj_surface, gen6_encoder_context);
+        /* reference 1, FIXME: */
+        // obj_surface = SURFACE(pPicParameter->reference_picture);
+        // assert(obj_surface);
+        //gen7_vme_source_surface_state(ctx, 3, obj_surface);
+    }
+
+    /* VME output */
+    gen7_vme_output_buffer_setup(ctx, encode_state, 0, gen6_encoder_context);
+
+    return VA_STATUS_SUCCESS;
+}
+
 static VAStatus gen6_vme_interface_setup(VADriverContextP ctx, 
                                          struct encode_state *encode_state,
                                          struct gen6_encoder_context *gen6_encoder_context)
@@ -627,12 +896,17 @@ static VAStatus gen6_vme_prepare(VADriverContextP ctx,
                                  struct encode_state *encode_state,
                                  struct gen6_encoder_context *gen6_encoder_context)
 {
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
     VAStatus vaStatus = VA_STATUS_SUCCESS;
     VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
     int is_intra = pSliceParameter->slice_flags.bits.is_intra;
        
     /*Setup all the memory object*/
-    gen6_vme_surface_setup(ctx, encode_state, is_intra, gen6_encoder_context);
+    if (IS_GEN7(i965->intel.device_id))
+        gen7_vme_surface_setup(ctx, encode_state, is_intra, gen6_encoder_context);
+    else
+        gen6_vme_surface_setup(ctx, encode_state, is_intra, gen6_encoder_context);
+
     gen6_vme_interface_setup(ctx, encode_state, gen6_encoder_context);
     gen6_vme_constant_setup(ctx, encode_state, gen6_encoder_context);
     gen6_vme_vme_state_setup(ctx, encode_state, is_intra, gen6_encoder_context);
@@ -679,7 +953,10 @@ Bool gen6_vme_context_init(VADriverContextP ctx, struct gen6_vme_context *vme_co
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     int i;
 
-    memcpy(vme_context->vme_kernels, gen6_vme_kernels, sizeof(vme_context->vme_kernels));
+    if (IS_GEN7(i965->intel.device_id))
+        memcpy(vme_context->vme_kernels, gen7_vme_kernels, sizeof(vme_context->vme_kernels));
+    else
+        memcpy(vme_context->vme_kernels, gen6_vme_kernels, sizeof(vme_context->vme_kernels));
 
     for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
         /*Load kernel into GPU memory*/        
index 66b0b5d..a14e111 100644 (file)
 #define MFX_AES_STATE                           MFX(2, 0, 0, 5)
 #define MFX_STATE_POINTER                       MFX(2, 0, 0, 6)
 #define MFX_QM_STATE                            MFX(2, 0, 0, 7)
+#define MFX_FQM_STATE                           MFX(2, 0, 0, 8)
 
 #define MFX_WAIT                                MFX(1, 0, 0, 0)
 
index f6ae6a4..c5a0267 100644 (file)
@@ -158,7 +158,7 @@ static struct hw_codec_info gen6_hw_codec_info = {
 extern struct hw_context *gen7_dec_hw_context_init(VADriverContextP, VAProfile);
 static struct hw_codec_info gen7_hw_codec_info = {
     .dec_hw_context_init = gen7_dec_hw_context_init,
-    .enc_hw_context_init = NULL,
+    .enc_hw_context_init = gen6_enc_hw_context_init,
 };
 
 VAStatus 
index df59e45..12a8d14 100644 (file)
@@ -1252,4 +1252,56 @@ struct gen7_sampler_state
    } ss3;
 };
 
+struct gen7_surface_state2
+{
+    struct {
+        unsigned int surface_base_address;
+    } ss0;
+
+    struct {
+        unsigned int cbcr_pixel_offset_v_direction:2;
+        unsigned int picture_structure:2;
+        unsigned int width:14;
+        unsigned int height:14;
+    } ss1;
+
+    struct {
+        unsigned int tile_walk:1;
+        unsigned int tiled_surface:1;
+        unsigned int half_pitch_for_chroma:1;
+        unsigned int pitch:18;
+        unsigned int pad0:1;
+        unsigned int surface_object_control_data:4;
+        unsigned int pad1:1;
+        unsigned int interleave_chroma:1;
+        unsigned int surface_format:4;
+    } ss2;
+
+    struct {
+        unsigned int y_offset_for_cb:15;
+        unsigned int pad0:1;
+        unsigned int x_offset_for_cb:14;
+        unsigned int pad1:2;
+    } ss3;
+
+    struct {
+        unsigned int y_offset_for_cr:15;
+        unsigned int pad0:1;
+        unsigned int x_offset_for_cr:14;
+        unsigned int pad1:2;
+    } ss4;
+
+    struct {
+        unsigned int pad0;
+    } ss5;
+
+    struct {
+        unsigned int pad0;
+    } ss6;
+
+    struct {
+        unsigned int pad0;
+    } ss7;
+};
+
 #endif /* _I965_STRUCTS_H_ */
index f52bde4..f310793 100644 (file)
@@ -166,10 +166,19 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
         if (batch->flag == I915_EXEC_RENDER) {
             BEGIN_BATCH(batch, 4);
             OUT_BATCH(batch, CMD_PIPE_CONTROL | 0x2);
-            OUT_BATCH(batch, 
-                      CMD_PIPE_CONTROL_WC_FLUSH |
-                      CMD_PIPE_CONTROL_TC_FLUSH |
-                      CMD_PIPE_CONTROL_NOWRITE);
+
+            if (IS_GEN6(intel->device_id))
+                OUT_BATCH(batch, 
+                          CMD_PIPE_CONTROL_WC_FLUSH |
+                          CMD_PIPE_CONTROL_TC_FLUSH |
+                          CMD_PIPE_CONTROL_NOWRITE);
+            else
+                OUT_BATCH(batch, 
+                          CMD_PIPE_CONTROL_WC_FLUSH |
+                          CMD_PIPE_CONTROL_TC_FLUSH |
+                          CMD_PIPE_CONTROL_DC_FLUSH |
+                          CMD_PIPE_CONTROL_NOWRITE);
+
             OUT_BATCH(batch, 0);
             OUT_BATCH(batch, 0);
             ADVANCE_BATCH(batch);
index a031935..e31360d 100644 (file)
@@ -50,6 +50,7 @@
 #define CMD_PIPE_CONTROL_IS_FLUSH               (1 << 11)
 #define CMD_PIPE_CONTROL_TC_FLUSH               (1 << 10)
 #define CMD_PIPE_CONTROL_NOTIFY_ENABLE          (1 << 8)
+#define CMD_PIPE_CONTROL_DC_FLUSH               (1 << 5)
 #define CMD_PIPE_CONTROL_GLOBAL_GTT             (1 << 2)
 #define CMD_PIPE_CONTROL_LOCAL_PGTT             (0 << 2)
 #define CMD_PIPE_CONTROL_DEPTH_CACHE_FLUSH      (1 << 0)