int transform_8x8_mode_flag,
struct intel_encoder_context *encoder_context)
{
- struct intel_batchbuffer *batch = encoder_context->base.batch;
- int mb_x, mb_y, i;
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
int total_mbs = mb_width * mb_height;
- int number_mb_cmds = 512;
- int starting_mb = 0;
- int last_object = 0;
+ int number_mb_cmds = 128;
+ int mb_x = 0, mb_y = 0;
+ int i, count = 0;
+ unsigned int *command_ptr;
- for (i = 0; i < total_mbs / number_mb_cmds; i++) {
- mb_x = starting_mb % mb_width;
- mb_y = starting_mb / mb_width;
- last_object = (total_mbs - starting_mb) == number_mb_cmds;
- starting_mb += number_mb_cmds;
+ dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
+ command_ptr = vme_context->vme_batchbuffer.bo->virtual;
- BEGIN_BATCH(batch, 9);
-
- OUT_BATCH(batch, CMD_MEDIA_OBJECT | (9 - 2));
- OUT_BATCH(batch, VME_BATCHBUFFER);
- OUT_BATCH(batch, 0);
- OUT_BATCH(batch, 0);
- OUT_BATCH(batch, 0);
- OUT_BATCH(batch, 0);
+ for (i = 0; i < total_mbs / number_mb_cmds; i++) {
+ mb_x = count % mb_width;
+ mb_y = count / mb_width;
+
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+ *command_ptr++ = kernel;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
/*inline data */
- OUT_BATCH(batch,
- kernel << 24 |
- transform_8x8_mode_flag << 16 |
- mb_width);
- OUT_BATCH(batch,
- number_mb_cmds << 16 |
- mb_y << 8 |
- mb_x);
- OUT_BATCH(batch, last_object);
-
- ADVANCE_BATCH(batch);
- }
+ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
+ *command_ptr++ = (number_mb_cmds << 16 | transform_8x8_mode_flag);
- if (!last_object) {
- number_mb_cmds = total_mbs % number_mb_cmds;
- mb_x = starting_mb % mb_width;
- mb_y = starting_mb / mb_width;
- last_object = 1;
- starting_mb += number_mb_cmds;
+ count += number_mb_cmds;
+ }
- BEGIN_BATCH(batch, 9);
+ number_mb_cmds = total_mbs - count;
- OUT_BATCH(batch, CMD_MEDIA_OBJECT | (9 - 2));
- OUT_BATCH(batch, VME_BATCHBUFFER);
- OUT_BATCH(batch, 0);
- OUT_BATCH(batch, 0);
- OUT_BATCH(batch, 0);
- OUT_BATCH(batch, 0);
+ if (number_mb_cmds) {
+ mb_x = count % mb_width;
+ mb_y = count / mb_width;
+
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+ *command_ptr++ = kernel;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
/*inline data */
- OUT_BATCH(batch,
- kernel << 24 |
- transform_8x8_mode_flag << 16 |
- mb_width);
- OUT_BATCH(batch,
- number_mb_cmds << 16 |
- mb_y << 8 |
- mb_x);
- OUT_BATCH(batch, last_object);
-
- ADVANCE_BATCH(batch);
+ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
+ *command_ptr++ = (number_mb_cmds << 16 | transform_8x8_mode_flag);
}
+ *command_ptr++ = 0;
+ *command_ptr++ = MI_BATCH_BUFFER_END;
+
+ dri_bo_unmap(vme_context->vme_batchbuffer.bo);
}
static void gen6_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
- intel_batchbuffer_start_atomic(batch, 0x1000);
-
- gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
gen6_vme_fill_vme_batchbuffer(ctx,
encode_state,
width_in_mbs, height_in_mbs,
is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
encoder_context);
+
+ intel_batchbuffer_start_atomic(batch, 0x1000);
gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
BEGIN_BATCH(batch, 2);
OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
INTEL_G6B = batchbuffer.g6b intra_frame.g6b inter_frame.g6b
INTEL_G6A = batchbuffer.g6a intra_frame.g6a inter_frame.g6a
-INTEL_GEN6_INC = gen6_batchbuffer_header.inc gen6_vme_header.inc
+INTEL_GEN6_INC = batchbuffer.inc vme.inc
INTEL_GEN6_ASM = $(INTEL_G6A:%.g6a=%.gen6.asm)
INTEL_G7B = batchbuffer.g7b intra_frame.g7b inter_frame.g7b
INTEL_G7A = batchbuffer.g7a intra_frame.g7a inter_frame.g7a
-INTEL_GEN7_INC = gen7_batchbuffer_header.inc gen7_vme_header.inc
+INTEL_GEN7_INC = batchbuffer.inc vme.inc
INTEL_GEN7_ASM = $(INTEL_G7A:%.g7a=%.gen7.asm)
TARGETS =
/*
* __START
*/
-__INTER_START:
- and.z.f0.1 (1) remainder_cmds<1>:uw total_mbs<0,1,0>:uw 0x0003:uw {align1};
- and.z.f0.0 (1) total_mbs<1>:uw total_mbs<0,1,0>:uw 0xfffc:uw {align1};
-
+__START:
mov (16) tmp_reg0<1>:ud 0x0:ud {align1} ;
+ mov (16) tmp_reg2<1>:ud 0x0:ud {align1} ;
+ mov (1) obw_header.20<1>:ub thread_id_ub {align1}; /* dispatch id */
mov (8) media_object_ud<1>:ud 0x0:ud {align1} ;
mov (1) media_object0_ud<1>:ud CMD_MEDIA_OBJECT {align1} ;
mov (1) media_object1_ud<1>:ud mtype_ub<0,1,0>ub {align1};
mov (1) media_object6_width<1>:uw width_in_mb<0,1,0>:uw {align1};
- mov (1) media_object7_ud<1>:ud transform_8x8_ub<0,1,0>ub {align1};
+ mov (1) media_object7_flag<1>:uw transform_8x8_ub<0,1,0>ub {align1};
+ mov (1) media_object7_num_mbs<1>:uw NUM_MACROBLOCKS_PER_COMMAND:uw {align1} ;
- mul (1) tmp_reg0.8<1>:ud width_in_mb<0,1,0>:uw mb_y<0,1,0>:ub {align1};
- add (1) tmp_reg0.8<1>:ud tmp_reg0.8<0,1,0>:ud mb_x<0,1,0>:ub {align1};
- mul (1) tmp_reg0.8<1>:ud tmp_reg0.8<0,1,0>:ud 0x2:ud {align1} ;
- mov (1) tmp_reg0.20<1>:ub thread_id_ub {align1}; /* dispatch id */
-
- (f0.0)jmpi (1) __REMAINDER ;
+ mov (1) width_per_row<1>:ud width_in_mb<0,1,0>:uw {align1} ;
+ and.z.f0.1 (1) remainder_cmds<1>:ud total_mbs<0,1,0>:ud (NUM_MACROBLOCKS_PER_COMMAND - 1):ud {align1} ;
+ and.z.f0.0 (1) total_mbs<1>:ud total_mbs<0,1,0>:ud -NUM_MACROBLOCKS_PER_COMMAND:ud {align1} ;
-__CMD_LOOP:
- mov (8) msg_reg0.0<1>:ud tmp_reg0<8,8,1>:ud {align1};
- add (1) tmp_reg0.8<1>:ud tmp_reg0.8<0,1,0>:ud 8:uw {align1} ;
+ (f0.0)jmpi (1) __REMAINDER ;
- mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ;
+__CMD_LOOP:
+ mov (8) msg_reg0.0<1>:ud obw_header<8,8,1>:ud {align1};
mov (8) msg_reg1<1>:ud media_object_ud<8,8,1>:ud {align1};
- add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1};
- cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1};
- (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ;
- (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ;
- mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ;
- mov (8) msg_reg2<1>:ud media_object_ud<8,8,1>:ud {align1};
- add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1};
- cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1};
- (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ;
- (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ;
-
- mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ;
- mov (8) msg_reg3<1>:ud media_object_ud<8,8,1>:ud {align1};
- add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1};
- cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1};
- (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ;
- (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ;
-
- mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ;
- mov (8) msg_reg4<1>:ud media_object_ud<8,8,1>:ud {align1};
- add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1};
- cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1};
- (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ;
- (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ;
-
-/* bind index 5, write 8 oword, msg type: 8(OWord Block Write) */
+/* bind index 5, write 2 oword, msg type: 8(OWord Block Write) */
send (16)
msg_ind
obw_wb
data_port(
OBW_CACHE_TYPE,
OBW_MESSAGE_TYPE,
- OBW_CONTROL_4,
+ OBW_CONTROL_2,
OBW_BIND_IDX,
OBW_WRITE_COMMIT_CATEGORY,
OBW_HEADER_PRESENT
)
- mlen 5
+ mlen 2
rlen obw_wb_length
{align1};
+
+ /* (x, y) of the first macroblock */
+ add (1) count<1>:ud count<0,1,0>:ud NUM_MACROBLOCKS_PER_COMMAND:uw {align1} ;
+ math (1) quotient<1>:ud count<0,1,0>:ud width_per_row<0,1,0>:ud intdivmod {align1} ;
+ shl (1) quotient<1>:ud quotient<0,1,0>:ud 8:uw {align1} ;
+ add (1) quotient<1>:ud quotient<0,1,0>:ud remainder<0,1,0>:ud {align1} ;
+ mov (1) media_object6_xy<1>:uw quotient<0,1,0>:uw {align1} ;
-
- add.z.f0.0 (1) total_mbs<1>:w total_mbs<0,1,0>:w -4:w {align1};
+ /* the new offset */
+ add (1) obw_header.8<1>:ud obw_header.8<0,1,0>:ud 2:uw {align1} ;
+
+ add.z.f0.0 (1) total_mbs<1>:w total_mbs<0,1,0>:w -NUM_MACROBLOCKS_PER_COMMAND:w {align1} ;
(-f0.0)jmpi (1) __CMD_LOOP ;
-
+
__REMAINDER:
- (f0.1)jmpi (1) __DONE ;
+ (f0.1)jmpi (1) __DONE ;
-__REMAINDER_LOOP:
- mov (8) msg_reg0.0<1>:ud tmp_reg0<8,8,1>:ud {align1} ;
- add (1) tmp_reg0.8<1>:ud tmp_reg0.8<0,1,0>:ud 2:uw {align1} ;
-
- mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ;
+ mov (1) media_object7_num_mbs<1>:uw remainder_cmds<0,1,0>:uw {align1} ;
+ mov (8) msg_reg0.0<1>:ud obw_header<8,8,1>:ud {align1};
mov (8) msg_reg1<1>:ud media_object_ud<8,8,1>:ud {align1};
- add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1};
- cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1};
- (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ;
- (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ;
-
-/* bind index 5, write 2 oword, msg type: 8(OWord Block Write) */
+
send (16)
msg_ind
obw_wb
mlen 2
rlen obw_wb_length
{align1};
-
- add.z.f0.1 (1) remainder_cmds<1>:w remainder_cmds<0,1,0>:w -1:w;
- (-f0.1)jmpi (1) __REMAINDER_LOOP ;
+
+ /* the new offset */
+ add (1) obw_header.8<1>:ud obw_header.8<0,1,0>:ud 2:uw {align1} ;
__DONE:
- cmp.e.f0.0 (1) null<1>:uw last_object<0,1,0>:uw 1:uw {align1};
- (-f0.0)jmpi (1) __EXIT ;
-
/* bind index 5, write 1 oword, msg type: 8(OWord Block Write) */
- mov (8) msg_reg0.0<1>:ud tmp_reg0<8,8,1>:ud {align1} ;
+ mov (8) msg_reg0.0<1>:ud obw_header<8,8,1>:ud {align1} ;
mov (4) msg_reg1.0<1>:ud 0x0:ud {align1} ;
mov (1) msg_reg1.4<1>:ud MI_BATCH_BUFFER_END {align1} ;
* Xiang Haihao <haihao.xiang@intel.com>
*/
-#include "gen6_batchbuffer_header.inc"
+#include "batchbuffer.inc"
#include "batchbuffer.asm"
- { 0x01000005, 0x22202d29, 0x020000a6, 0x00030003 },
- { 0x01000005, 0x20a62d29, 0x000000a6, 0xfffcfffc },
{ 0x00800001, 0x21000061, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x21400061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x21140231, 0x00000014, 0x00000000 },
{ 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
{ 0x00000001, 0x22000061, 0x00000000, 0x71000006 },
- { 0x00000001, 0x22040221, 0x000000a3, 0x00000000 },
- { 0x00000001, 0x221a0129, 0x000000a0, 0x00000000 },
- { 0x00000001, 0x221c0221, 0x000000a2, 0x00000000 },
- { 0x00000041, 0x21084521, 0x000000a0, 0x000000a5 },
- { 0x00000040, 0x21084421, 0x00000108, 0x000000a4 },
- { 0x00000041, 0x21080c21, 0x00000108, 0x00000002 },
- { 0x00000001, 0x21140231, 0x00000014, 0x00000000 },
- { 0x00010020, 0x34001c00, 0x00001400, 0x0000003a },
+ { 0x00000001, 0x22040221, 0x000000a5, 0x00000000 },
+ { 0x00000001, 0x221a0129, 0x000000a6, 0x00000000 },
+ { 0x00000001, 0x221c0229, 0x000000a4, 0x00000000 },
+ { 0x00000001, 0x221e0169, 0x00000000, 0x02000200 },
+ { 0x00000001, 0x21280121, 0x000000a6, 0x00000000 },
+ { 0x01000005, 0x21240c21, 0x020000a0, 0x000001ff },
+ { 0x01000005, 0x20a00c21, 0x000000a0, 0xfffffe00 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000016 },
{ 0x00600001, 0x20000022, 0x008d0100, 0x00000000 },
- { 0x00000040, 0x21082c21, 0x00000108, 0x00080008 },
- { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0200, 0x00000000 },
- { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
- { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
- { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
- { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
- { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
- { 0x00600001, 0x20400022, 0x008d0200, 0x00000000 },
- { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
- { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
- { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
- { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
- { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
- { 0x00600001, 0x20600022, 0x008d0200, 0x00000000 },
- { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
- { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
- { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
- { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
- { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
- { 0x00600001, 0x20800022, 0x008d0200, 0x00000000 },
- { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
- { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
- { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
- { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
- { 0x05800031, 0x22401cdd, 0x00000000, 0x0a1b0405 },
- { 0x01000040, 0x20a63dad, 0x000000a6, 0xfffcfffc },
- { 0x00110020, 0x34001c00, 0x00001400, 0xffffffc6 },
- { 0x00010020, 0x34001c00, 0x02001400, 0x00000016 },
- { 0x00600001, 0x20000022, 0x008d0100, 0x00000000 },
+ { 0x05800031, 0x22401cdd, 0x00000000, 0x041b0205 },
+ { 0x00000040, 0x21202c21, 0x00000120, 0x02000200 },
+ { 0x0b000038, 0x21400421, 0x00000120, 0x00000128 },
+ { 0x00000009, 0x21402c21, 0x00000140, 0x00080008 },
+ { 0x00000040, 0x21400421, 0x00000140, 0x00000160 },
+ { 0x00000001, 0x22180129, 0x00000140, 0x00000000 },
{ 0x00000040, 0x21082c21, 0x00000108, 0x00020002 },
- { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
+ { 0x01000040, 0x20a03dad, 0x000000a0, 0xfe00fe00 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0xffffffea },
+ { 0x00010020, 0x34001c00, 0x02001400, 0x0000000a },
+ { 0x00000001, 0x221e0129, 0x00000124, 0x00000000 },
+ { 0x00600001, 0x20000022, 0x008d0100, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0200, 0x00000000 },
- { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
- { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
- { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
- { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
{ 0x05800031, 0x22401cdd, 0x00000000, 0x041b0205 },
- { 0x01000040, 0x22203dad, 0x02000220, 0xffffffff },
- { 0x00110020, 0x34001c00, 0x02001400, 0xffffffea },
- { 0x01000010, 0x20002d28, 0x000000a8, 0x00010001 },
- { 0x00110020, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00000040, 0x21082c21, 0x00000108, 0x00020002 },
{ 0x00600001, 0x20000022, 0x008d0100, 0x00000000 },
{ 0x00400001, 0x20200062, 0x00000000, 0x00000000 },
{ 0x00000001, 0x20240062, 0x00000000, 0x05000000 },
* Xiang Haihao <haihao.xiang@intel.com>
*/
-#include "gen7_batchbuffer_header.inc"
+#include "batchbuffer.inc"
#include "batchbuffer.asm"
- { 0x01000005, 0x22202d29, 0x020000a6, 0x00030003 },
- { 0x01000005, 0x20a62d29, 0x000000a6, 0xfffcfffc },
{ 0x00800001, 0x21000061, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x21400061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x21140231, 0x00000014, 0x00000000 },
{ 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
{ 0x00000001, 0x22000061, 0x00000000, 0x71000006 },
- { 0x00000001, 0x22040221, 0x000000a3, 0x00000000 },
- { 0x00000001, 0x221a0129, 0x000000a0, 0x00000000 },
- { 0x00000001, 0x221c0221, 0x000000a2, 0x00000000 },
- { 0x00000041, 0x21084521, 0x000000a0, 0x000000a5 },
- { 0x00000040, 0x21084421, 0x00000108, 0x000000a4 },
- { 0x00000041, 0x21080c21, 0x00000108, 0x00000002 },
- { 0x00000001, 0x21140231, 0x00000014, 0x00000000 },
- { 0x00010020, 0x34001c00, 0x00001400, 0x0000003a },
+ { 0x00000001, 0x22040221, 0x000000a5, 0x00000000 },
+ { 0x00000001, 0x221a0129, 0x000000a6, 0x00000000 },
+ { 0x00000001, 0x221c0229, 0x000000a4, 0x00000000 },
+ { 0x00000001, 0x221e0169, 0x00000000, 0x02000200 },
+ { 0x00000001, 0x21280121, 0x000000a6, 0x00000000 },
+ { 0x01000005, 0x21240c21, 0x020000a0, 0x000001ff },
+ { 0x01000005, 0x20a00c21, 0x000000a0, 0xfffffe00 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000016 },
{ 0x00600001, 0x28000021, 0x008d0100, 0x00000000 },
- { 0x00000040, 0x21082c21, 0x00000108, 0x00080008 },
- { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
{ 0x00600001, 0x28200021, 0x008d0200, 0x00000000 },
- { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
- { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
- { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
- { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
- { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
- { 0x00600001, 0x28400021, 0x008d0200, 0x00000000 },
- { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
- { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
- { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
- { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
- { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
- { 0x00600001, 0x28600021, 0x008d0200, 0x00000000 },
- { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
- { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
- { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
- { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
- { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
- { 0x00600001, 0x28800021, 0x008d0200, 0x00000000 },
- { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
- { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
- { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
- { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
- { 0x0a800031, 0x20001cac, 0x00000800, 0x0a0a0405 },
- { 0x01000040, 0x20a63dad, 0x000000a6, 0xfffcfffc },
- { 0x00110020, 0x34001c00, 0x00001400, 0xffffffc6 },
- { 0x00010020, 0x34001c00, 0x02001400, 0x00000016 },
- { 0x00600001, 0x28000021, 0x008d0100, 0x00000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0205 },
+ { 0x00000040, 0x21202c21, 0x00000120, 0x02000200 },
+ { 0x0b000038, 0x21400421, 0x00000120, 0x00000128 },
+ { 0x00000009, 0x21402c21, 0x00000140, 0x00080008 },
+ { 0x00000040, 0x21400421, 0x00000140, 0x00000160 },
+ { 0x00000001, 0x22180129, 0x00000140, 0x00000000 },
{ 0x00000040, 0x21082c21, 0x00000108, 0x00020002 },
- { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
+ { 0x01000040, 0x20a03dad, 0x000000a0, 0xfe00fe00 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0xffffffea },
+ { 0x00010020, 0x34001c00, 0x02001400, 0x0000000a },
+ { 0x00000001, 0x221e0129, 0x00000124, 0x00000000 },
+ { 0x00600001, 0x28000021, 0x008d0100, 0x00000000 },
{ 0x00600001, 0x28200021, 0x008d0200, 0x00000000 },
- { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
- { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
- { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
- { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
{ 0x0a800031, 0x20001cac, 0x00000800, 0x040a0205 },
- { 0x01000040, 0x22203dad, 0x02000220, 0xffffffff },
- { 0x00110020, 0x34001c00, 0x02001400, 0xffffffea },
- { 0x01000010, 0x20002d28, 0x000000a8, 0x00010001 },
- { 0x00110020, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00000040, 0x21082c21, 0x00000108, 0x00020002 },
{ 0x00600001, 0x28000021, 0x008d0100, 0x00000000 },
{ 0x00400001, 0x28200061, 0x00000000, 0x00000000 },
{ 0x00000001, 0x28240061, 0x00000000, 0x05000000 },
define(`BIND_IDX_INEP', `4')
define(`BIND_IDX_VME_BATCHBUFFER', `5')
+#ifdef DEV_SNB
+
define(`OBW_CACHE_TYPE', `5')
+#else
+
+define(`OBW_CACHE_TYPE', `10')
+
+#endif
+
define(`OBW_MESSAGE_TYPE', `8')
define(`OBW_BIND_IDX', `BIND_IDX_VME_BATCHBUFFER')
define(`OBW_CONTROL_3', `3') /* 4 OWords */
define(`OBW_CONTROL_4', `4') /* 8 OWords */
+#ifdef DEV_SNB
+
define(`OBW_WRITE_COMMIT_CATEGORY', `1') /* write commit on Sandybrige */
+#else
+
+define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */
+
+#endif
+
define(`OBW_HEADER_PRESENT', `1')
define(`CMD_MEDIA_OBJECT', `0x71000006:UD')
define(`MI_BATCH_BUFFER_END', `0x05000000:UD')
+define(`NUM_MACROBLOCKS_PER_COMMAND', `512')
+
/* GRF registers
* r0 header
* r1~r4 constant buffer (reserved)
* GRF 5 -- inline data
*/
define(`inline_reg0', `r5')
-define(`width_in_mb', `inline_reg0.0') /* the picture width in macroblocks */
-define(`transform_8x8_ub', `inline_reg0.2') /* transform_8x8 flag */
-define(`mtype_ub', `inline_reg0.3') /* 0: INTRA, 1: INTER */
-define(`mb_x', `inline_reg0.4')
-define(`mb_y', `inline_reg0.5')
-define(`mb_xy', `inline_reg0.4')
-define(`total_mbs', `inline_reg0.6') /* the number of macroblock commands
- * being processed by the kernel
- */
-define(`last_object', `inline_reg0.8') /* the last object flag */
+define(`total_mbs', `inline_reg0.0') /* the number of macroblocks in a picture */
+define(`transform_8x8_ub', `inline_reg0.4') /* transform_8x8 flag */
+define(`mtype_ub', `inline_reg0.5') /* 0: INTRA, 1: INTER */
+define(`width_in_mb', `inline_reg0.6') /* the picture width in macroblocks */
+
/*
* GRF 8~15 -- temporary registers
*/
define(`tmp_reg0', `r8')
+define(`obw_header', `tmp_reg0')
define(`tmp_reg1', `r9')
+define(`count', `tmp_reg1.0')
+define(`remainder_cmds', `tmp_reg1.4')
+define(`width_per_row', `tmp_reg1.8')
define(`tmp_reg2', `r10')
+define(`quotient', `tmp_reg2')
define(`tmp_reg3', `r11')
+define(`remainder', `tmp_reg3')
define(`tmp_reg4', `r12')
define(`tmp_reg5', `r13')
define(`tmp_reg6', `r14')
define(`media_object5_ud', `r16.20')
define(`media_object6_ud', `r16.24')
define(`media_object6_xy', `r16.24')
+define(`media_object6_x', `r16.24')
+define(`media_object6_y', `r16.25')
define(`media_object6_width', `r16.26')
define(`media_object7_ud', `r16.28')
-
-/*
- * GRF 17
- */
-define(`remainder_cmds', `r17.0')
+define(`media_object7_flag', `r16.28')
+define(`media_object7_num_mbs', `r16.30')
/*
* GRF 18 write back for Oword Block Write message
*/
+#if DEV_SNB
+
+define(`obw_wb', `r18')
+define(`obw_wb_length', `1')
+
+#else
+
/*
* write commit is removed on Ivybridge
*/
-define(`obw_wb', `r18')
-define(`obw_wb_length', `1')
+define(`obw_wb', `null<1>:W')
+define(`obw_wb_length', `0')
+#endif
/*
* Message Payload registers
*/
+#if DEV_SNB
+
define(`msg_ind', `0')
define(`msg_reg0', `m0')
define(`msg_reg1', `m1')
define(`msg_reg6', `m6')
define(`msg_reg7', `m7')
define(`msg_reg8', `m8')
+
+#else
+
+define(`msg_ind', `64')
+define(`msg_reg0', `g64')
+define(`msg_reg1', `g65')
+define(`msg_reg2', `g66')
+define(`msg_reg3', `g67')
+define(`msg_reg4', `g68')
+define(`msg_reg5', `g69')
+define(`msg_reg6', `g70')
+define(`msg_reg7', `g71')
+define(`msg_reg8', `g72')
+
+#endif
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Xiang Haihao <haihao.xiang@intel.com>
- */
-
-define(`BIND_IDX_OUTPUT', `0')
-define(`BIND_IDX_VME', `1')
-define(`BIND_IDX_VME_REF0', `2')
-define(`BIND_IDX_VME_REF1', `3')
-define(`BIND_IDX_INEP', `4')
-define(`BIND_IDX_VME_BATCHBUFFER', `5')
-
-define(`OBW_CACHE_TYPE', `10')
-
-define(`OBW_MESSAGE_TYPE', `8')
-
-define(`OBW_BIND_IDX', `BIND_IDX_VME_BATCHBUFFER')
-
-define(`OBW_CONTROL_0', `0') /* 1 OWord, low 128 bits */
-define(`OBW_CONTROL_1', `1') /* 1 OWord, high 128 bits */
-define(`OBW_CONTROL_2', `2') /* 2 OWords */
-define(`OBW_CONTROL_3', `3') /* 4 OWords */
-define(`OBW_CONTROL_4', `4') /* 8 OWords */
-
-define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */
-
-define(`OBW_HEADER_PRESENT', `1')
-
-define(`CMD_MEDIA_OBJECT', `0x71000006:UD')
-define(`MI_BATCH_BUFFER_END', `0x05000000:UD')
-
-/* GRF registers
- * r0 header
- * r1~r4 constant buffer (reserved)
- * r5 inline data
- * r6~r7 reserved
- * r8~r15 temporary registers
- * r16 write back of Oword Block Write
- */
-/*
- * GRF 0 -- header
- */
-define(`thread_id_ub', `r0.20<0,1,0>:UB') /* thread id in payload */
-
-/*
- * GRF 1~4 -- Constant Buffer (reserved)
- */
-
-/*
- * GRF 5 -- inline data
- */
-define(`inline_reg0', `r5')
-define(`width_in_mb', `inline_reg0.0') /* the picture width in macroblocks */
-define(`transform_8x8_ub', `inline_reg0.2') /* transform_8x8 flag */
-define(`mtype_ub', `inline_reg0.3') /* 0: INTRA, 1: INTER */
-define(`mb_x', `inline_reg0.4')
-define(`mb_y', `inline_reg0.5')
-define(`mb_xy', `inline_reg0.4')
-define(`total_mbs', `inline_reg0.6') /* the number of macroblock commands
- * being processed by the kernel
- */
-define(`last_object', `inline_reg0.8') /* the last object flag */
-/*
- * GRF 8~15 -- temporary registers
- */
-define(`tmp_reg0', `r8')
-define(`tmp_reg1', `r9')
-define(`tmp_reg2', `r10')
-define(`tmp_reg3', `r11')
-define(`tmp_reg4', `r12')
-define(`tmp_reg5', `r13')
-define(`tmp_reg6', `r14')
-define(`tmp_reg7', `r15')
-
-/*
- * GRF 16
- */
-define(`media_object_ud', `r16.0')
-define(`media_object0_ud', `r16.0')
-define(`media_object1_ud', `r16.4')
-define(`media_object2_ud', `r16.8')
-define(`media_object3_ud', `r16.12')
-define(`media_object4_ud', `r16.16')
-define(`media_object5_ud', `r16.20')
-define(`media_object6_ud', `r16.24')
-define(`media_object6_xy', `r16.24')
-define(`media_object6_width', `r16.26')
-define(`media_object7_ud', `r16.28')
-
-/*
- * GRF 17
- */
-define(`remainder_cmds', `r17.0')
-
-/*
- * GRF 16 write back for Oword Block Write message
- */
-
-/*
- * write commit is removed on Ivybridge
- */
-define(`obw_wb', `null<1>:W')
-define(`obw_wb_length', `0')
-
-
-/*
- * Message Payload registers
- */
-define(`msg_ind', `64')
-define(`msg_reg0', `g64')
-define(`msg_reg1', `g65')
-define(`msg_reg2', `g66')
-define(`msg_reg3', `g67')
-define(`msg_reg4', `g68')
-define(`msg_reg5', `g69')
-define(`msg_reg6', `g70')
-define(`msg_reg7', `g71')
-define(`msg_reg8', `g72')
+++ /dev/null
-/*
- * Copyright © <2010>, Intel Corporation.
- *
- * This program is licensed under the terms and conditions of the
- * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
- * http://www.opensource.org/licenses/eclipse-1.0.php.
- *
- */
-// Modual name: ME_header.inc
-//
-// Global symbols define
-//
-
-/*
- * Constant
- */
-define(`VME_MESSAGE_TYPE_INTER', `1')
-define(`VME_MESSAGE_TYPE_INTRA', `2')
-define(`VME_MESSAGE_TYPE_MIXED', `3')
-
-define(`BLOCK_32X1', `0x0000001F')
-define(`BLOCK_4X16', `0x000F0003')
-
-define(`LUMA_INTRA_16x16_DISABLE', `0x1')
-define(`LUMA_INTRA_8x8_DISABLE', `0x2')
-define(`LUMA_INTRA_4x4_DISABLE', `0x4')
-
-define(`INTRA_PRED_AVAIL_FLAG_AE', `0x60')
-define(`INTRA_PRED_AVAIL_FLAG_B', `0x10')
-define(`INTRA_PRED_AVAIL_FLAG_C', `0x8')
-define(`INTRA_PRED_AVAIL_FLAG_D', `0x4')
-
-define(`BIND_IDX_VME', `0')
-define(`BIND_IDX_VME_REF0', `1')
-define(`BIND_IDX_VME_REF1', `2')
-define(`BIND_IDX_OUTPUT', `3')
-define(`BIND_IDX_INEP', `4')
-
-define(`SUB_PEL_MODE_INTEGER', `0x00000000')
-define(`SUB_PEL_MODE_HALF', `0x00001000')
-define(`SUB_PEL_MODE_QUARTER', `0x00003000')
-
-define(`INTER_SAD_NONE', `0x00000000')
-define(`INTER_SAD_HAAR', `0x00200000')
-
-define(`INTRA_SAD_NONE', `0x00000000')
-define(`INTRA_SAD_HAAR', `0x00800000')
-
-define(`INTER_PART_MASK', `0x7E000000')
-
-define(`REF_REGION_SIZE', `0x2830:UW')
-
-define(`BI_SUB_MB_PART_MASK', `0x0c000000')
-define(`MAX_NUM_MV', `0x00000020')
-define(`SEARCH_PATH_LEN', `0x00003F3F')
-
-define(`INTRA_PREDICTORE_MODE', `0x11111111:UD')
-
-define(`OBW_CACHE_TYPE', `10')
-
-define(`OBW_MESSAGE_TYPE', `8')
-
-define(`OBW_BIND_IDX', `BIND_IDX_OUTPUT')
-
-define(`OBW_CONTROL_0', `0') /* 1 OWord, low 128 bits */
-define(`OBW_CONTROL_1', `1') /* 1 OWord, high 128 bits */
-define(`OBW_CONTROL_2', `2') /* 2 OWords */
-define(`OBW_CONTROL_3', `3') /* 4 OWords */
-
-define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */
-
-define(`OBW_HEADER_PRESENT', `1')
-
-/* GRF registers
- * r0 header
- * r1~r4 constant buffer (reserved)
- * r5 inline data
- * r6~r11 reserved
- * r12 write back of VME message
- * r13 write back of Oword Block Write
- */
-/*
- * GRF 0 -- header
- */
-define(`thread_id_ub', `r0.20<0,1,0>:UB') /* thread id in payload */
-
-/*
- * GRF 1~4 -- Constant Buffer (reserved)
- */
-
-/*
- * GRF 5 -- inline data
- */
-define(`inline_reg0', `r5')
-define(`w_in_mb_uw', `inline_reg0.2')
-define(`orig_xy_ub', `inline_reg0.0')
-define(`orig_x_ub', `inline_reg0.0') /* in macroblock */
-define(`orig_y_ub', `inline_reg0.1')
-define(`transform_8x8_ub', `inline_reg0.4')
-
-/*
- * GRF 6~11 -- reserved
- */
-
-/*
- * GRF 12~15 -- write back for VME message
- */
-define(`vme_wb', `r12')
-define(`vme_wb0', `r12')
-define(`vme_wb1', `r13')
-define(`vme_wb2', `r14')
-define(`vme_wb3', `r15')
-
-/*
- * GRF 16 -- reserved
- */
-/*
- * write commit is removed on Ivybridge
- */
-define(`obw_wb', `null<1>:W')
-define(`obw_wb_length', `0')
-/*
- * GRF 18~21 -- Intra Neighbor Edge Pixels
- */
-define(`INEP_ROW', `r18')
-define(`INEP_COL0', `r20')
-define(`INEP_COL1', `r21')
-
-/*
- * temporary registers
- */
-define(`tmp_reg0', `r32')
-define(`tmp_reg1', `r33')
-define(`intra_part_mask_ub', `tmp_reg1.28')
-define(`mb_intra_struct_ub', `tmp_reg1.29')
-define(`tmp_reg2', `r34')
-define(`tmp_x_w', `tmp_reg2.0')
-define(`tmp_reg3', `r35')
-
-/*
- * Message Payload registers
- */
-define(`msg_ind', `64')
-define(`msg_reg0', `g64')
-define(`msg_reg1', `g65')
-define(`msg_reg2', `g66')
-define(`msg_reg3', `g67')
-define(`msg_reg4', `g68')
-
-/*
- * VME message payload
- */
-define(`vme_msg_length', `5')
-define(`vme_intra_wb_length', `1')
-define(`vme_inter_wb_length', `6')
-define(`vme_msg_ind', `msg_ind')
-define(`vme_msg_0', `msg_reg0')
-define(`vme_msg_1', `msg_reg1')
-define(`vme_msg_2', `msg_reg2')
-define(`vme_msg_3', `msg_reg3')
-define(`vme_msg_4', `msg_reg4')
-
-
-
-
__INTER_START:
mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1};
mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg3.0<1>:UD 0x0:UD {align1};
-/*
- * VME message
- */
-/* m0 */
-mul (2) tmp_reg0.8<1>:UW orig_xy_ub<2,2,1>:UB 16:UW {align1}; /* Source = (x, y) * 16 */
+shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* Source = (x, y) * 16 */
#ifdef DEV_SNB
-mul (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB 16:UW {align1};
-add (1) tmp_reg0.0<1>:W tmp_reg0.0<2,2,1>:W -16:W {align1}; /* Reference = (x-16,y-12)-(x+32,y+24) */
-add (1) tmp_reg0.2<1>:W tmp_reg0.2<2,2,1>:W -12:W {align1};
+shl (2) vme_m0.0<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1};
+add (1) vme_m0.0<1>:W vme_m0.0<2,2,1>:W -16:W {align1}; /* Reference = (x-16,y-12)-(x+32,y+24) */
+add (1) vme_m0.2<1>:W vme_m0.2<2,2,1>:W -12:W {align1};
#else
-mov (1) tmp_reg0.0<1>:W -16:W {align1} ; /* Reference = (x-16,y-12)-(x+32,y+24) */
-mov (1) tmp_reg0.2<1>:W -12:W {align1} ;
+mov (1) vme_m0.0<1>:W -16:W {align1} ; /* Reference = (x-16,y-12)-(x+32,y+24) */
+mov (1) vme_m0.2<1>:W -12:W {align1} ;
#endif
-mov (1) tmp_reg0.12<1>:UD INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER:UD {align1}; /* 16x16 Source, 1/4 pixel, harr */
-
+mov (1) vme_m0.12<1>:UD INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER:UD {align1}; /* 16x16 Source, 1/4 pixel, harr */
+mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */
+
+mov (1) vme_m1.4<1>:UD MAX_NUM_MV:UD {align1}; /* Default value MAX 32 MVs */
+mov (1) vme_m1.8<1>:UD SEARCH_PATH_LEN:UD {align1};
-mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
-mov (1) tmp_reg0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */
-mov (8) vme_msg_0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1};
+mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
+shl (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x2:UD {align1};
+mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/*
+ * VME message
+ */
+/* m0 */
+__VME_LOOP:
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
/* m1 */
-mov (1) tmp_reg1.4<1>:UD MAX_NUM_MV:UD {align1}; /* Default value MAX 32 MVs */
-mov (1) tmp_reg1.8<1>:UD SEARCH_PATH_LEN:UD {align1};
-
-mov (8) vme_msg_1<1>:UD tmp_reg1.0<8,8,1>:UD {align1};
+mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1};
/* m2 */
mov (8) vme_msg_2<1>:UD 0x0:UD {align1};
mlen vme_msg_length
rlen vme_inter_wb_length
{align1};
-
/*
* Oword Block Write message
*/
-mul (1) tmp_reg3.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
-add (1) tmp_reg3.8<1>:UD tmp_reg3.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
-mul (1) tmp_reg3.8<1>:UD tmp_reg3.8<0,1,0>:UD 0x4:UD {align1};
-mov (1) tmp_reg3.20<1>:UB thread_id_ub {align1}; /* dispatch id */
-mov (8) msg_reg0.0<1>:UD tmp_reg3.0<8,8,1>:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1};
#ifdef DEV_SNB
-mov (2) tmp_reg3.0<1>:UW vme_wb1.0<2,2,1>:UB {align1};
-add (1) tmp_reg3.0<1>:W tmp_reg3.0<2,2,1>:W -64:W {align1};
-add (1) tmp_reg3.2<1>:W tmp_reg3.2<2,2,1>:W -48:W {align1};
+mov (2) obw_m1.0<1>:UW vme_wb1.0<2,2,1>:UB {align1};
+add (1) obw_m1.0<1>:W obw_m1.0<2,2,1>:W -64:W {align1};
+add (1) obw_m1.2<1>:W obw_m1.2<2,2,1>:W -48:W {align1};
#else
-mov (2) tmp_reg3.0<1>:UW vme_wb1.0<2,2,1>:B {align1};
+mov (2) obw_m1.0<1>:UW vme_wb1.0<2,2,1>:B {align1};
#endif
-mov (8) msg_reg1.0<1>:UD tmp_reg3.0<8,8,0>:UD {align1};
+mov (8) msg_reg1.0<1>:UD obw_m1.0<8,8,0>:UD {align1};
-mov (8) msg_reg2.0<1>:UD tmp_reg3.0<8,8,0>:UD {align1};
+mov (8) msg_reg2.0<1>:UD obw_m1.0<8,8,0>:UD {align1};
/* bind index 3, write 4 oword, msg type: 8(OWord Block Write) */
send (16)
mlen 3
rlen obw_wb_length
{align1};
+
+add (1) orig_x_ub<1>:ub orig_x_ub<0,1,0>:ub 1:uw {align1} ;
+add (1) vme_m0.8<1>:UW vme_m0.8<0,1,0>:UW 16:UW {align1}; /* X += 16 */
+#ifdef DEV_SNB
+add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W 16:W {align1}; /* X += 16 */
+#endif
+
+cmp.e.f0.0 (1) null<1>:uw w_in_mb_uw<0,1,0>:uw orig_x_ub<0,1,0>:ub {align1};
+/* (0, y + 1) */
+(f0.0)mov (1) orig_x_ub<1>:ub 0:uw {align1} ;
+(f0.0)mov (1) vme_m0.8<1>:uw 0:uw {align1} ;
+(f0.0)add (1) vme_m0.10<1>:uw vme_m0.10<0,1,0>:uw 16:uw {align1} ;
+#ifdef DEV_SNB
+(f0.0)mov (1) vme_m0.0<1>:w -16:W {align1}; /* Reference = (x-16,y-12)-(x+32,y+24) */
+(f0.0)add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 16:w {align1};
+#endif
+
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 4:UW {align1} ; /* offset += 4 */
+
+add.z.f0.1 (1) num_macroblocks<1>:w num_macroblocks<0,1,0>:w -1:w {align1} ;
+(-f0.1)jmpi (1) __VME_LOOP ;
+
+__EXIT:
/*
* kill thread
-#include "gen6_vme_header.inc"
+#include "vme.inc"
#include "inter_frame.asm"
{ 0x00800001, 0x24000061, 0x00000000, 0x00000000 },
{ 0x00800001, 0x24400061, 0x00000000, 0x00000000 },
- { 0x00200041, 0x24082e29, 0x004500a0, 0x00100010 },
- { 0x00200041, 0x24002e29, 0x004500a0, 0x00100010 },
- { 0x00000040, 0x24003dad, 0x00450400, 0xfff0fff0 },
- { 0x00000040, 0x24023dad, 0x00450402, 0xfff4fff4 },
- { 0x00000001, 0x240c0061, 0x00000000, 0x7e203000 },
- { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
- { 0x00000001, 0x24160169, 0x00000000, 0x28302830 },
- { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 },
- { 0x00000001, 0x24240061, 0x00000000, 0x00000020 },
- { 0x00000001, 0x24280061, 0x00000000, 0x00003f3f },
- { 0x00600001, 0x20200022, 0x008d0420, 0x00000000 },
+ { 0x00800001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
+ { 0x00200009, 0x24402e29, 0x004500a0, 0x00040004 },
+ { 0x00000040, 0x24403dad, 0x00450440, 0xfff0fff0 },
+ { 0x00000040, 0x24423dad, 0x00450442, 0xfff4fff4 },
+ { 0x00000001, 0x244c0061, 0x00000000, 0x7e203000 },
+ { 0x00000001, 0x24540231, 0x00000014, 0x00000000 },
+ { 0x00000001, 0x24560169, 0x00000000, 0x28302830 },
+ { 0x00000001, 0x24640061, 0x00000000, 0x00000020 },
+ { 0x00000001, 0x24680061, 0x00000000, 0x00003f3f },
+ { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 },
+ { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 },
+ { 0x00000009, 0x24880c21, 0x00000488, 0x00000002 },
+ { 0x00000001, 0x24940231, 0x00000014, 0x00000000 },
+ { 0x00600001, 0x20000022, 0x008d0440, 0x00000000 },
+ { 0x00600001, 0x20200022, 0x008d0460, 0x00000000 },
{ 0x00600001, 0x20400062, 0x00000000, 0x00000000 },
{ 0x00600001, 0x20400062, 0x00000000, 0x00000000 },
{ 0x00600001, 0x20600062, 0x00000000, 0x00000000 },
{ 0x08600031, 0x21801cdd, 0x00000000, 0x08482000 },
- { 0x00000041, 0x24684521, 0x000000a2, 0x000000a1 },
- { 0x00000040, 0x24684421, 0x00000468, 0x000000a0 },
- { 0x00000041, 0x24680c21, 0x00000468, 0x00000004 },
- { 0x00000001, 0x24740231, 0x00000014, 0x00000000 },
- { 0x00600001, 0x20000022, 0x008d0460, 0x00000000 },
- { 0x00200001, 0x24600229, 0x004501a0, 0x00000000 },
- { 0x00000040, 0x24603dad, 0x00450460, 0xffc0ffc0 },
- { 0x00000040, 0x24623dad, 0x00450462, 0xffd0ffd0 },
- { 0x00600001, 0x20200022, 0x008c0460, 0x00000000 },
- { 0x00600001, 0x20400022, 0x008c0460, 0x00000000 },
+ { 0x00600001, 0x20000022, 0x008d0480, 0x00000000 },
+ { 0x00200001, 0x24a00229, 0x004501a0, 0x00000000 },
+ { 0x00000040, 0x24a03dad, 0x004504a0, 0xffc0ffc0 },
+ { 0x00000040, 0x24a23dad, 0x004504a2, 0xffd0ffd0 },
+ { 0x00600001, 0x20200022, 0x008c04a0, 0x00000000 },
+ { 0x00600001, 0x20400022, 0x008c04a0, 0x00000000 },
{ 0x05800031, 0x22001cdd, 0x00000000, 0x061b0303 },
+ { 0x00000040, 0x20a02e31, 0x000000a0, 0x00010001 },
+ { 0x00000040, 0x24482d29, 0x00000448, 0x00100010 },
+ { 0x00000040, 0x24403dad, 0x00000440, 0x00100010 },
+ { 0x01000010, 0x20004528, 0x000000a2, 0x000000a0 },
+ { 0x00010001, 0x20a00171, 0x00000000, 0x00000000 },
+ { 0x00010001, 0x24480169, 0x00000000, 0x00000000 },
+ { 0x00010040, 0x244a2d29, 0x0000044a, 0x00100010 },
+ { 0x00010001, 0x244001ed, 0x00000000, 0xfff0fff0 },
+ { 0x00010040, 0x24423dad, 0x00000442, 0x00100010 },
+ { 0x00000040, 0x24882c21, 0x00000488, 0x00040004 },
+ { 0x01000040, 0x20a63dad, 0x020000a6, 0xffffffff },
+ { 0x00110020, 0x34001c00, 0x02001400, 0xffffffce },
{ 0x00600001, 0x20000022, 0x008d0000, 0x00000000 },
{ 0x07800031, 0x24001cc8, 0x00000000, 0x82000010 },
-#include "gen7_vme_header.inc"
+#include "vme.inc"
#include "inter_frame.asm"
{ 0x00800001, 0x24000061, 0x00000000, 0x00000000 },
{ 0x00800001, 0x24400061, 0x00000000, 0x00000000 },
- { 0x00200041, 0x24082e29, 0x004500a0, 0x00100010 },
- { 0x00000001, 0x240001ed, 0x00000000, 0xfff0fff0 },
- { 0x00000001, 0x240201ed, 0x00000000, 0xfff4fff4 },
- { 0x00000001, 0x240c0061, 0x00000000, 0x7e203000 },
- { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
- { 0x00000001, 0x24160169, 0x00000000, 0x28302830 },
- { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 },
- { 0x00000001, 0x24240061, 0x00000000, 0x00000020 },
- { 0x00000001, 0x24280061, 0x00000000, 0x00003f3f },
- { 0x00600001, 0x28200021, 0x008d0420, 0x00000000 },
+ { 0x00800001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
+ { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 },
+ { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 },
+ { 0x00000001, 0x244c0061, 0x00000000, 0x7e203000 },
+ { 0x00000001, 0x24540231, 0x00000014, 0x00000000 },
+ { 0x00000001, 0x24560169, 0x00000000, 0x28302830 },
+ { 0x00000001, 0x24640061, 0x00000000, 0x00000020 },
+ { 0x00000001, 0x24680061, 0x00000000, 0x00003f3f },
+ { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 },
+ { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 },
+ { 0x00000009, 0x24880c21, 0x00000488, 0x00000002 },
+ { 0x00000001, 0x24940231, 0x00000014, 0x00000000 },
+ { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
{ 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
{ 0x08600031, 0x21801cbd, 0x00000800, 0x0a682000 },
- { 0x00000041, 0x24684521, 0x000000a2, 0x000000a1 },
- { 0x00000040, 0x24684421, 0x00000468, 0x000000a0 },
- { 0x00000041, 0x24680c21, 0x00000468, 0x00000004 },
- { 0x00000001, 0x24740231, 0x00000014, 0x00000000 },
- { 0x00600001, 0x28000021, 0x008d0460, 0x00000000 },
- { 0x00200001, 0x246002a9, 0x004501a0, 0x00000000 },
- { 0x00600001, 0x28200021, 0x008c0460, 0x00000000 },
- { 0x00600001, 0x28400021, 0x008c0460, 0x00000000 },
+ { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+ { 0x00200001, 0x24a002a9, 0x004501a0, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008c04a0, 0x00000000 },
+ { 0x00600001, 0x28400021, 0x008c04a0, 0x00000000 },
{ 0x0a800031, 0x20001cac, 0x00000800, 0x060a0303 },
+ { 0x00000040, 0x20a02e31, 0x000000a0, 0x00010001 },
+ { 0x00000040, 0x24482d29, 0x00000448, 0x00100010 },
+ { 0x01000010, 0x20004528, 0x000000a2, 0x000000a0 },
+ { 0x00010001, 0x20a00171, 0x00000000, 0x00000000 },
+ { 0x00010001, 0x24480169, 0x00000000, 0x00000000 },
+ { 0x00010040, 0x244a2d29, 0x0000044a, 0x00100010 },
+ { 0x00000040, 0x24882c21, 0x00000488, 0x00040004 },
+ { 0x01000040, 0x20a63dad, 0x020000a6, 0xffffffff },
+ { 0x00110020, 0x34001c00, 0x02001400, 0xffffffd8 },
{ 0x00600001, 0x28000021, 0x008d0000, 0x00000000 },
{ 0x07800031, 0x24001ca8, 0x00000800, 0x82000010 },
__INTRA_START:
mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1};
mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1};
-
+mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ;
+
+shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */
+add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */
+mov (1) read0_header.8<1>:UD BLOCK_32X1 {align1};
+mov (1) read0_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */
+mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1};
+mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
+mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
/*
* Media Read Message -- fetch neighbor edge pixels
*/
/* ROW */
-mul (2) tmp_reg0.0<1>:D orig_xy_ub<2,2,1>:UB 16:UW {align1}; /* (x, y) * 16 */
-add (1) tmp_reg0.0<1>:D tmp_reg0.0<0,1,0>:D -8:W {align1}; /* X offset */
-add (1) tmp_reg0.4<1>:D tmp_reg0.4<0,1,0>:D -1:W {align1}; /* Y offset */
-mov (1) tmp_reg0.8<1>:UD BLOCK_32X1 {align1};
-mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
-mov (8) msg_reg0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1};
+__INTRA_LOOP:
+mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1};
send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
/* COL */
-mul (2) tmp_reg0.0<1>:D orig_xy_ub<2,2,1>:UB 16:UW {align1}; /* (x, y) * 16 */
-add (1) tmp_reg0.0<1>:D tmp_reg0.0<0,1,0>:D -4:W {align1}; /* X offset */
-mov (1) tmp_reg0.8<1>:UD BLOCK_4X16 {align1};
-mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
-mov (8) msg_reg0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1};
+mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1};
send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
/*
* VME message
*/
/* m0 */
-mul (2) tmp_reg0.8<1>:UW orig_xy_ub<2,2,1>:UB 16:UW {align1}; /* (x, y) * 16 */
-mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
-mov (8) vme_msg_0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1};
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
/* m1 */
+mov (1) intra_flag<1>:UW 0x0:UW {align1} ;
cmp.nz.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1};
(f0.0) mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE + LUMA_INTRA_4x4_DISABLE {align1};
mul.nz.f0.0 (1) null<1>:UD tmp_x_w<0,1,0>:W orig_y_ub<0,1,0>:UB {align1}; /* (width - (X + 1)) * Y != 0 */
(f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_C {align1}; /* C */
-mov (8) vme_msg_1<1>:UD tmp_reg1.0<8,8,1>:UD {align1};
+mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1};
/* m2 */
mov (8) vme_msg_2<1>:UD 0x0:UD {align1};
/*
* Oword Block Write message
*/
-mul (1) tmp_reg3.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
-add (1) tmp_reg3.8<1>:UD tmp_reg3.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
-mov (1) tmp_reg3.20<1>:UB thread_id_ub {align1}; /* dispatch id */
-mov (8) msg_reg0.0<1>:UD tmp_reg3<8,8,1>:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1};
rlen obw_wb_length
{align1};
+add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D 16:W {align1}; /* X offset: X += 16 */
+add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D 16:W {align1}; /* X offset: X += 16 */
+add (1) vme_m0.8<1>:UW vme_m0.8<0,1,0>:UW 16:UW {align1}; /* Y = Y, X += 16 */
+
+add (1) orig_x_ub<1>:ub orig_x_ub<0,1,0>:ub 1:uw {align1} ;
+cmp.e.f0.0 (1) null<1>:uw w_in_mb_uw<0,1,0>:uw orig_x_ub<0,1,0>:ub {align1};
+(f0.0)mov (1) orig_x_ub<1>:ub 0:uw {align1} ;
+(f0.0)add (1) orig_y_ub<1>:ub orig_y_ub<0,1,0>:ub 1:uw {align1} ;
+(f0.0)mov (1) read0_header.0<1>:D -8:W {align1}; /* X offset */
+(f0.0)add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D 16:W {align1}; /* Y offset */
+(f0.0)mov (1) read1_header.0<1>:D -4:W {align1}; /* X offset */
+(f0.0)add (1) read1_header.4<1>:D read1_header.4<0,1,0>:D 16:W {align1}; /* Y offset */
+/* X = 0, Y += 16 */
+(f0.0)mov (1) vme_m0.8<1>:UW 0:UW {align1};
+(f0.0)add (1) vme_m0.10<1>:UW vme_m0.10<0,1,0>:UW 16:UW {align1};
+
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 1:uw {align1}; /* the new offset */
+
+add.z.f0.0 (1) num_macroblocks<1>:w num_macroblocks<0,1,0>:w -1:w {align1} ;
+(-f0.0)jmpi (1) __INTRA_LOOP ;
+
+__EXIT:
/*
* kill thread
*/
-#include "gen6_vme_header.inc"
+#include "vme.inc"
#include "intra_frame.asm"
{ 0x00800001, 0x24000061, 0x00000000, 0x00000000 },
{ 0x00800001, 0x24400061, 0x00000000, 0x00000000 },
- { 0x00200041, 0x24002e25, 0x004500a0, 0x00100010 },
+ { 0x00800001, 0x24800061, 0x00000000, 0x00000000 },
+ { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 },
{ 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 },
{ 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff },
{ 0x00000001, 0x240800e1, 0x00000000, 0x0000001f },
{ 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 },
+ { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc },
+ { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 },
+ { 0x00000001, 0x24340231, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
+ { 0x00000001, 0x24540231, 0x00000014, 0x00000000 },
+ { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 },
+ { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 },
+ { 0x00000001, 0x24940231, 0x00000014, 0x00000000 },
{ 0x00600001, 0x20000022, 0x008d0400, 0x00000000 },
{ 0x04600031, 0x22401cd1, 0x00000000, 0x02188004 },
- { 0x00200041, 0x24002e25, 0x004500a0, 0x00100010 },
- { 0x00000040, 0x24003ca5, 0x00000400, 0xfffcfffc },
- { 0x00000001, 0x240800e1, 0x00000000, 0x000f0003 },
- { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
- { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 },
+ { 0x00600001, 0x20000022, 0x008d0420, 0x00000000 },
{ 0x04600031, 0x22801cd1, 0x00000000, 0x02288004 },
- { 0x00200041, 0x24082e29, 0x004500a0, 0x00100010 },
- { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
- { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 },
+ { 0x00600001, 0x20000022, 0x008d0440, 0x00000000 },
+ { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 },
{ 0x02000010, 0x20002e28, 0x000000a4, 0x00010001 },
- { 0x00010001, 0x243c00f1, 0x00000000, 0x00000006 },
+ { 0x00010001, 0x247c00f1, 0x00000000, 0x00000006 },
{ 0x02000010, 0x20002e28, 0x000000a0, 0x00000000 },
- { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000060 },
+ { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000060 },
{ 0x02000010, 0x20002e28, 0x000000a1, 0x00000000 },
- { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000010 },
+ { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000010 },
{ 0x02000041, 0x20004628, 0x000000a0, 0x000000a1 },
- { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000004 },
- { 0x00000040, 0x24402e2d, 0x000000a0, 0x00010001 },
- { 0x00000040, 0x2440352d, 0x000000a2, 0x00004440 },
- { 0x02000041, 0x200045a0, 0x00000440, 0x000000a1 },
- { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000008 },
- { 0x00600001, 0x20200022, 0x008d0420, 0x00000000 },
+ { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000004 },
+ { 0x00000040, 0x24c02e2d, 0x000000a0, 0x00010001 },
+ { 0x00000040, 0x24c0352d, 0x000000a2, 0x000044c0 },
+ { 0x02000041, 0x200045a0, 0x000004c0, 0x000000a1 },
+ { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000008 },
+ { 0x00600001, 0x20200022, 0x008d0460, 0x00000000 },
{ 0x00600001, 0x20400062, 0x00000000, 0x00000000 },
{ 0x00600001, 0x20400022, 0x008d0240, 0x00000000 },
{ 0x00600001, 0x206000e2, 0x00000000, 0x00000000 },
{ 0x00800001, 0x20600232, 0x00cf0283, 0x00000000 },
{ 0x00000001, 0x20700062, 0x00000000, 0x11111111 },
{ 0x08600031, 0x21801cdd, 0x00000000, 0x08184000 },
- { 0x00000041, 0x24684521, 0x000000a2, 0x000000a1 },
- { 0x00000040, 0x24684421, 0x00000468, 0x000000a0 },
- { 0x00000001, 0x24740231, 0x00000014, 0x00000000 },
- { 0x00600001, 0x20000022, 0x008d0460, 0x00000000 },
+ { 0x00600001, 0x20000022, 0x008d0480, 0x00000000 },
{ 0x00000001, 0x20200022, 0x00000180, 0x00000000 },
{ 0x00000001, 0x20240022, 0x00000190, 0x00000000 },
{ 0x00000001, 0x20280022, 0x00000194, 0x00000000 },
{ 0x00000001, 0x202c0022, 0x00000198, 0x00000000 },
{ 0x05800031, 0x22001cdd, 0x00000000, 0x041b0003 },
+ { 0x00000040, 0x24003ca5, 0x00000400, 0x00100010 },
+ { 0x00000040, 0x24203ca5, 0x00000420, 0x00100010 },
+ { 0x00000040, 0x24482d29, 0x00000448, 0x00100010 },
+ { 0x00000040, 0x20a02e31, 0x000000a0, 0x00010001 },
+ { 0x01000010, 0x20004528, 0x000000a2, 0x000000a0 },
+ { 0x00010001, 0x20a00171, 0x00000000, 0x00000000 },
+ { 0x00010040, 0x20a12e31, 0x000000a1, 0x00010001 },
+ { 0x00010001, 0x240001e5, 0x00000000, 0xfff8fff8 },
+ { 0x00010040, 0x24043ca5, 0x00000404, 0x00100010 },
+ { 0x00010001, 0x242001e5, 0x00000000, 0xfffcfffc },
+ { 0x00010040, 0x24243ca5, 0x00000424, 0x00100010 },
+ { 0x00010001, 0x24480169, 0x00000000, 0x00000000 },
+ { 0x00010040, 0x244a2d29, 0x0000044a, 0x00100010 },
+ { 0x00000040, 0x24882c21, 0x00000488, 0x00010001 },
+ { 0x01000040, 0x20a63dad, 0x000000a6, 0xffffffff },
+ { 0x00110020, 0x34001c00, 0x00001400, 0xffffffa2 },
{ 0x00600001, 0x20000022, 0x008d0000, 0x00000000 },
{ 0x07800031, 0x24001cc8, 0x00000000, 0x82000010 },
-#include "gen7_vme_header.inc"
+#include "vme.inc"
#include "intra_frame.asm"
{ 0x00800001, 0x24000061, 0x00000000, 0x00000000 },
{ 0x00800001, 0x24400061, 0x00000000, 0x00000000 },
- { 0x00200041, 0x24002e25, 0x004500a0, 0x00100010 },
+ { 0x00800001, 0x24800061, 0x00000000, 0x00000000 },
+ { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 },
{ 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 },
{ 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff },
{ 0x00000001, 0x240800e1, 0x00000000, 0x0000001f },
{ 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 },
+ { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc },
+ { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 },
+ { 0x00000001, 0x24340231, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
+ { 0x00000001, 0x24540231, 0x00000014, 0x00000000 },
+ { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 },
+ { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 },
+ { 0x00000001, 0x24940231, 0x00000014, 0x00000000 },
{ 0x00600001, 0x28000021, 0x008d0400, 0x00000000 },
{ 0x04600031, 0x22401cb1, 0x00000800, 0x02190004 },
- { 0x00200041, 0x24002e25, 0x004500a0, 0x00100010 },
- { 0x00000040, 0x24003ca5, 0x00000400, 0xfffcfffc },
- { 0x00000001, 0x240800e1, 0x00000000, 0x000f0003 },
- { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
- { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 },
+ { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 },
{ 0x04600031, 0x22801cb1, 0x00000800, 0x02290004 },
- { 0x00200041, 0x24082e29, 0x004500a0, 0x00100010 },
- { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
- { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 },
+ { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+ { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 },
{ 0x02000010, 0x20002e28, 0x000000a4, 0x00010001 },
- { 0x00010001, 0x243c00f1, 0x00000000, 0x00000006 },
+ { 0x00010001, 0x247c00f1, 0x00000000, 0x00000006 },
{ 0x02000010, 0x20002e28, 0x000000a0, 0x00000000 },
- { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000060 },
+ { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000060 },
{ 0x02000010, 0x20002e28, 0x000000a1, 0x00000000 },
- { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000010 },
+ { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000010 },
{ 0x02000041, 0x20004628, 0x000000a0, 0x000000a1 },
- { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000004 },
- { 0x00000040, 0x24402e2d, 0x000000a0, 0x00010001 },
- { 0x00000040, 0x2440352d, 0x000000a2, 0x00004440 },
- { 0x02000041, 0x200045a0, 0x00000440, 0x000000a1 },
- { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000008 },
- { 0x00600001, 0x28200021, 0x008d0420, 0x00000000 },
+ { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000004 },
+ { 0x00000040, 0x24c02e2d, 0x000000a0, 0x00010001 },
+ { 0x00000040, 0x24c0352d, 0x000000a2, 0x000044c0 },
+ { 0x02000041, 0x200045a0, 0x000004c0, 0x000000a1 },
+ { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000008 },
+ { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
{ 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x28600021, 0x008d0240, 0x00000000 },
{ 0x00600001, 0x288000e1, 0x00000000, 0x00000000 },
{ 0x00800001, 0x28800231, 0x00cf0283, 0x00000000 },
{ 0x00000001, 0x28900061, 0x00000000, 0x11111111 },
{ 0x08600031, 0x21801cbd, 0x00000800, 0x0a184000 },
- { 0x00000041, 0x24684521, 0x000000a2, 0x000000a1 },
- { 0x00000040, 0x24684421, 0x00000468, 0x000000a0 },
- { 0x00000001, 0x24740231, 0x00000014, 0x00000000 },
- { 0x00600001, 0x28000021, 0x008d0460, 0x00000000 },
+ { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
{ 0x00000001, 0x28200021, 0x00000180, 0x00000000 },
{ 0x00000001, 0x28240021, 0x00000190, 0x00000000 },
{ 0x00000001, 0x28280021, 0x00000194, 0x00000000 },
{ 0x00000001, 0x282c0021, 0x00000198, 0x00000000 },
{ 0x0a800031, 0x20001cac, 0x00000800, 0x040a0003 },
+ { 0x00000040, 0x24003ca5, 0x00000400, 0x00100010 },
+ { 0x00000040, 0x24203ca5, 0x00000420, 0x00100010 },
+ { 0x00000040, 0x24482d29, 0x00000448, 0x00100010 },
+ { 0x00000040, 0x20a02e31, 0x000000a0, 0x00010001 },
+ { 0x01000010, 0x20004528, 0x000000a2, 0x000000a0 },
+ { 0x00010001, 0x20a00171, 0x00000000, 0x00000000 },
+ { 0x00010040, 0x20a12e31, 0x000000a1, 0x00010001 },
+ { 0x00010001, 0x240001e5, 0x00000000, 0xfff8fff8 },
+ { 0x00010040, 0x24043ca5, 0x00000404, 0x00100010 },
+ { 0x00010001, 0x242001e5, 0x00000000, 0xfffcfffc },
+ { 0x00010040, 0x24243ca5, 0x00000424, 0x00100010 },
+ { 0x00010001, 0x24480169, 0x00000000, 0x00000000 },
+ { 0x00010040, 0x244a2d29, 0x0000044a, 0x00100010 },
+ { 0x00000040, 0x24882c21, 0x00000488, 0x00010001 },
+ { 0x01000040, 0x20a63dad, 0x000000a6, 0xffffffff },
+ { 0x00110020, 0x34001c00, 0x00001400, 0xffffffa2 },
{ 0x00600001, 0x28000021, 0x008d0000, 0x00000000 },
{ 0x07800031, 0x24001ca8, 0x00000800, 0x82000010 },
define(`INTRA_PREDICTORE_MODE', `0x11111111:UD')
+#ifdef DEV_SNB
+
define(`OBW_CACHE_TYPE', `5')
+#else
+
+define(`OBW_CACHE_TYPE', `10')
+
+#endif
+
define(`OBW_MESSAGE_TYPE', `8')
define(`OBW_BIND_IDX', `BIND_IDX_OUTPUT')
define(`OBW_CONTROL_2', `2') /* 2 OWords */
define(`OBW_CONTROL_3', `3') /* 4 OWords */
+#ifdef DEV_SNB
+
define(`OBW_WRITE_COMMIT_CATEGORY', `1') /* write commit on Sandybrige */
+#else
+
+define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */
+
+#endif
+
+
define(`OBW_HEADER_PRESENT', `1')
/* GRF registers
define(`orig_x_ub', `inline_reg0.0') /* in macroblock */
define(`orig_y_ub', `inline_reg0.1')
define(`transform_8x8_ub', `inline_reg0.4')
+define(`num_macroblocks', `inline_reg0.6')
/*
* GRF 6~11 -- reserved
define(`vme_wb1', `r13')
define(`vme_wb2', `r14')
define(`vme_wb3', `r15')
-
+
+#ifdef DEV_SNB
/*
* GRF 16 -- write back for Oword Block Write message with write commit bit
*/
define(`obw_wb', `r16')
define(`obw_wb_length', `1')
+#else
+
+/*
+ * GRF 16 -- reserved
+ */
+define(`obw_wb', `null<1>:W')
+define(`obw_wb_length', `0')
+
+#endif
+
/*
* GRF 18~21 -- Intra Neighbor Edge Pixels
*/
* temporary registers
*/
define(`tmp_reg0', `r32')
+define(`read0_header', `tmp_reg0')
define(`tmp_reg1', `r33')
-define(`intra_part_mask_ub', `tmp_reg1.28')
-define(`mb_intra_struct_ub', `tmp_reg1.29')
+define(`read1_header', `tmp_reg1')
define(`tmp_reg2', `r34')
-define(`tmp_x_w', `tmp_reg2.0')
-define(`tmp_reg3', `r35')
+define(`vme_m0', `tmp_reg2')
+define(`tmp_reg3', `r35')
+define(`vme_m1', `tmp_reg3')
+define(`intra_flag', `vme_m1.28')
+define(`intra_part_mask_ub', `vme_m1.28')
+define(`mb_intra_struct_ub', `vme_m1.29')
+define(`tmp_reg4', `r36')
+define(`obw_m0', `tmp_reg4')
+define(`tmp_reg5', `r37')
+define(`obw_m1', `tmp_reg5')
+define(`tmp_reg6', `r38')
+define(`tmp_x_w', `tmp_reg6.0')
/*
* MRF registers
*/
+#ifdef DEV_SNB
+
define(`msg_ind', `0')
define(`msg_reg0', `m0') /* m0 */
define(`msg_reg1', `m1') /* m1 */
define(`msg_reg3', `m3') /* m3 */
define(`msg_reg4', `m4') /* m4 */
+#else
+
+define(`msg_ind', `64')
+define(`msg_reg0', `g64')
+define(`msg_reg1', `g65')
+define(`msg_reg2', `g66')
+define(`msg_reg3', `g67')
+define(`msg_reg4', `g68')
+
+#endif
+
/*
* VME message payload
*/
+
+#ifdef DEV_SNB
+
define(`vme_msg_length', `4')
-define(`vme_intra_wb_length', `1')
define(`vme_inter_wb_length', `4')
+
+#else
+
+define(`vme_msg_length', `5')
+define(`vme_inter_wb_length', `6')
+
+#endif
+
+define(`vme_intra_wb_length', `1')
+
define(`vme_msg_ind', `msg_ind')
define(`vme_msg_0', `msg_reg0')
define(`vme_msg_1', `msg_reg1')
define(`vme_msg_2', `msg_reg2')
+
+#ifdef DEV_SNB
+
define(`vme_msg_3', `vme_msg_2')
define(`vme_msg_4', `msg_reg3')
+#else
+
+define(`vme_msg_3', `msg_reg3')
+define(`vme_msg_4', `msg_reg4')
+
+#endif