Create batchbuffer for VME via GPU shader on Sandybridge
authorXiang, Haihao <haihao.xiang@intel.com>
Tue, 14 Feb 2012 01:41:16 +0000 (09:41 +0800)
committerXiang, Haihao <haihao.xiang@intel.com>
Tue, 14 Feb 2012 01:41:16 +0000 (09:41 +0800)
It is the same of commits a4ff1bd and a4ff1bd

Signed-off-by: Xiang, Haihao <haihao.xiang@intel.com>
src/gen6_vme.c
src/shaders/vme/Makefile.am
src/shaders/vme/batchbuffer.g6a [new file with mode: 0644]
src/shaders/vme/batchbuffer.g6b [new file with mode: 0644]
src/shaders/vme/gen6_batchbuffer_header.inc [new file with mode: 0644]

index 974fa16..fd80a17 100644 (file)
@@ -68,6 +68,7 @@ static const uint32_t gen6_vme_inter_frame[][4] = {
 };
 
 static const uint32_t gen6_vme_batchbuffer[][4] = {
+#include "shaders/vme/batchbuffer.g6b"
 };
 
 static struct i965_kernel gen6_vme_kernels[] = {
@@ -325,6 +326,62 @@ gen6_vme_output_buffer_setup(VADriverContextP ctx,
     return VA_STATUS_SUCCESS;
 }
 
+static VAStatus
+gen6_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
+                                      struct encode_state *encode_state,
+                                      int index,
+                                      struct intel_encoder_context *encoder_context)
+
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    struct i965_surface_state *ss;
+    VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+    int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+    int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+    int num_entries;
+    dri_bo *bo;
+
+    vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
+    vme_context->vme_batchbuffer.size_block = 32; /* 2 OWORDs */
+    vme_context->vme_batchbuffer.pitch = 16;
+    bo = dri_bo_alloc(i965->intel.bufmgr, 
+                      "VME batchbuffer",
+                      vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
+                      0x1000);
+    assert(bo);
+    vme_context->vme_batchbuffer.bo = bo;
+
+    bo = vme_context->surface_state_binding_table.bo;
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+
+    ss = (struct i965_surface_state *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
+    memset(ss, 0, sizeof(*ss));
+
+    num_entries = vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block / vme_context->vme_batchbuffer.pitch;
+
+    ss->ss0.render_cache_read_mode = 1;
+    ss->ss0.surface_type = I965_SURFACE_BUFFER;
+
+    ss->ss1.base_addr = vme_context->vme_batchbuffer.bo->offset;
+
+    ss->ss2.width = ((num_entries - 1) & 0x7f);
+    ss->ss2.height = (((num_entries - 1) >> 7) & 0x1fff);
+    ss->ss3.depth = (((num_entries - 1) >> 20) & 0x7f);
+
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                      0,
+                      SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
+                      vme_context->vme_batchbuffer.bo);
+
+    ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+    dri_bo_unmap(bo);
+
+    return VA_STATUS_SUCCESS;
+}
+
 static VAStatus gen6_vme_surface_setup(VADriverContextP ctx, 
                                        struct encode_state *encode_state,
                                        int is_intra,
@@ -356,6 +413,7 @@ static VAStatus gen6_vme_surface_setup(VADriverContextP ctx,
 
     /* VME output */
     gen6_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context);
+    gen6_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
 
     return VA_STATUS_SUCCESS;
 }
index e9786f3..ba08576 100644 (file)
@@ -1,8 +1,8 @@
 VME_CORE       = batchbuffer.asm intra_frame.asm inter_frame.asm
 
-INTEL_G6B      = intra_frame.g6b inter_frame.g6b
-INTEL_G6A      = intra_frame.g6a inter_frame.g6a
-INTEL_GEN6_INC = gen6_vme_header.inc
+INTEL_G6B      = batchbuffer.g6b intra_frame.g6b inter_frame.g6b
+INTEL_G6A      = batchbuffer.g6a intra_frame.g6a inter_frame.g6a
+INTEL_GEN6_INC = gen6_batchbuffer_header.inc gen6_vme_header.inc
 INTEL_GEN6_ASM = $(INTEL_G6A:%.g6a=%.gen6.asm)
 
 INTEL_G7B      = batchbuffer.g7b intra_frame.g7b inter_frame.g7b
diff --git a/src/shaders/vme/batchbuffer.g6a b/src/shaders/vme/batchbuffer.g6a
new file mode 100644 (file)
index 0000000..4cfa6ef
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Xiang Haihao <haihao.xiang@intel.com>
+ */
+
+include(`gen6_batchbuffer_header.inc')
+include(`batchbuffer.asm')
+
diff --git a/src/shaders/vme/batchbuffer.g6b b/src/shaders/vme/batchbuffer.g6b
new file mode 100644 (file)
index 0000000..26fd464
--- /dev/null
@@ -0,0 +1,62 @@
+   { 0x01000005, 0x22202d29, 0x020000a6, 0x00030003 },
+   { 0x01000005, 0x20a62d29, 0x000000a6, 0xfffcfffc },
+   { 0x00800001, 0x21000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x22000061, 0x00000000, 0x71000006 },
+   { 0x00000001, 0x22040221, 0x000000a3, 0x00000000 },
+   { 0x00000001, 0x221a0129, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x221c0221, 0x000000a2, 0x00000000 },
+   { 0x00000041, 0x21084521, 0x000000a0, 0x000000a5 },
+   { 0x00000040, 0x21084421, 0x00000108, 0x000000a4 },
+   { 0x00000041, 0x21080c21, 0x00000108, 0x00000002 },
+   { 0x00000001, 0x21140231, 0x00000014, 0x00000000 },
+   { 0x00010020, 0x34001c00, 0x00001400, 0x0000003a },
+   { 0x00600001, 0x20000022, 0x008d0100, 0x00000000 },
+   { 0x00000040, 0x21082c21, 0x00000108, 0x00080008 },
+   { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
+   { 0x00600001, 0x20200022, 0x008d0200, 0x00000000 },
+   { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
+   { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
+   { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
+   { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
+   { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
+   { 0x00600001, 0x20400022, 0x008d0200, 0x00000000 },
+   { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
+   { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
+   { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
+   { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
+   { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
+   { 0x00600001, 0x20600022, 0x008d0200, 0x00000000 },
+   { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
+   { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
+   { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
+   { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
+   { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
+   { 0x00600001, 0x20800022, 0x008d0200, 0x00000000 },
+   { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
+   { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
+   { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
+   { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
+   { 0x05800031, 0x22401cdd, 0x00000000, 0x0a1b0405 },
+   { 0x01000040, 0x20a63dad, 0x000000a6, 0xfffcfffc },
+   { 0x00110020, 0x34001c00, 0x00001400, 0xffffffc6 },
+   { 0x00010020, 0x34001c00, 0x02001400, 0x00000016 },
+   { 0x00600001, 0x20000022, 0x008d0100, 0x00000000 },
+   { 0x00000040, 0x21082c21, 0x00000108, 0x00020002 },
+   { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
+   { 0x00600001, 0x20200022, 0x008d0200, 0x00000000 },
+   { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
+   { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
+   { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
+   { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
+   { 0x05800031, 0x22401cdd, 0x00000000, 0x041b0205 },
+   { 0x01000040, 0x22203dad, 0x02000220, 0xffffffff },
+   { 0x00110020, 0x34001c00, 0x02001400, 0xffffffea },
+   { 0x01000010, 0x20002d28, 0x000000a8, 0x00010001 },
+   { 0x00110020, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00600001, 0x20000022, 0x008d0100, 0x00000000 },
+   { 0x00400001, 0x20200062, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x20240062, 0x00000000, 0x05000000 },
+   { 0x05800031, 0x22401cdd, 0x00000000, 0x041b0005 },
+   { 0x00600001, 0x20000022, 0x008d0000, 0x00000000 },
+   { 0x07800031, 0x24001cc0, 0x00000000, 0x82000010 },
diff --git a/src/shaders/vme/gen6_batchbuffer_header.inc b/src/shaders/vme/gen6_batchbuffer_header.inc
new file mode 100644 (file)
index 0000000..d1bf50b
--- /dev/null
@@ -0,0 +1,141 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Xiang Haihao <haihao.xiang@intel.com>
+ */
+
+define(`BIND_IDX_VME',                  `0')
+define(`BIND_IDX_VME_REF0',             `1')
+define(`BIND_IDX_VME_REF1',             `2')
+define(`BIND_IDX_OUTPUT',               `3')
+define(`BIND_IDX_INEP',                 `4')
+define(`BIND_IDX_VME_BATCHBUFFER',      `5')
+
+define(`OBW_CACHE_TYPE',                `5')
+
+define(`OBW_MESSAGE_TYPE',              `8')
+
+define(`OBW_BIND_IDX',                  `BIND_IDX_VME_BATCHBUFFER')
+
+define(`OBW_CONTROL_0',                 `0')    /* 1 OWord, low 128 bits */
+define(`OBW_CONTROL_1',                 `1')    /* 1 OWord, high 128 bits */
+define(`OBW_CONTROL_2',                 `2')    /* 2 OWords */
+define(`OBW_CONTROL_3',                 `3')    /* 4 OWords */
+define(`OBW_CONTROL_4',                 `4')    /* 8 OWords */
+
+define(`OBW_WRITE_COMMIT_CATEGORY',     `1')    /* write commit on Sandybrige */
+
+define(`OBW_HEADER_PRESENT',            `1')
+
+define(`CMD_MEDIA_OBJECT',              `0x71000006:UD')
+define(`MI_BATCH_BUFFER_END',           `0x05000000:UD')
+
+/* GRF registers
+ * r0 header
+ * r1~r4 constant buffer (reserved)
+ * r5 inline data
+ * r6~r7 reserved      
+ * r8~r15 temporary registers
+ * r16 media object command
+ * r17 
+ * r18 write back of Oword Block Write
+ */
+/*
+ * GRF 0 -- header       
+ */        
+define(`thread_id_ub',          `r0.20<0,1,0>:UB')  /* thread id in payload */
+
+/*
+ * GRF 1~4 -- Constant Buffer (reserved)
+ */
+        
+/*
+ * GRF 5 -- inline data
+ */        
+define(`inline_reg0',           `r5')
+define(`width_in_mb',           `inline_reg0.0') /* the picture width in macroblocks */
+define(`transform_8x8_ub',      `inline_reg0.2') /* transform_8x8 flag */
+define(`mtype_ub',              `inline_reg0.3') /* 0: INTRA, 1: INTER */
+define(`mb_x',                  `inline_reg0.4')
+define(`mb_y',                  `inline_reg0.5')
+define(`mb_xy',                 `inline_reg0.4')
+define(`total_mbs',             `inline_reg0.6') /* the number of macroblock commands 
+                                                  * being processed by the kernel
+                                                  */
+define(`last_object',           `inline_reg0.8') /* the last object flag */
+/*
+ * GRF 8~15 -- temporary registers
+ */
+define(`tmp_reg0',              `r8')
+define(`tmp_reg1',              `r9')
+define(`tmp_reg2',              `r10')
+define(`tmp_reg3',              `r11')
+define(`tmp_reg4',              `r12')
+define(`tmp_reg5',              `r13')
+define(`tmp_reg6',              `r14')
+define(`tmp_reg7',              `r15')
+
+/*
+ * GRF 16
+ */
+define(`media_object_ud',       `r16.0')
+define(`media_object0_ud',      `r16.0')
+define(`media_object1_ud',      `r16.4')
+define(`media_object2_ud',      `r16.8')
+define(`media_object3_ud',      `r16.12')
+define(`media_object4_ud',      `r16.16')
+define(`media_object5_ud',      `r16.20')
+define(`media_object6_ud',      `r16.24')
+define(`media_object6_xy',      `r16.24')
+define(`media_object6_width',   `r16.26')
+define(`media_object7_ud',      `r16.28')
+
+/*
+ * GRF 17
+ */
+define(`remainder_cmds',        `r17.0')
+
+/* 
+ * GRF 18 write back for Oword Block Write message 
+ */
+
+/*
+ * write commit is removed on Ivybridge
+ */
+define(`obw_wb',                `r18')
+define(`obw_wb_length',         `1')
+
+
+/*
+ * Message Payload registers
+ */
+define(`msg_ind',               `0')
+define(`msg_reg0',              `m0')
+define(`msg_reg1',              `m1')
+define(`msg_reg2',              `m2')
+define(`msg_reg3',              `m3')
+define(`msg_reg4',              `m4')
+define(`msg_reg5',              `m5')
+define(`msg_reg6',              `m6')
+define(`msg_reg7',              `m7')
+define(`msg_reg8',              `m8')