From cd0ea78c532dcc9bcb39710bd2a336cee955a9c8 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 15 Mar 2012 09:46:37 +0800 Subject: [PATCH] VME: Handle multiple macroblocks in a single thread In addition, merge include files for GEN6 & GEN7 Signed-off-by: Xiang, Haihao --- src/gen6_vme.c | 97 +++++------- src/shaders/vme/Makefile.am | 4 +- src/shaders/vme/batchbuffer.asm | 99 +++++-------- src/shaders/vme/batchbuffer.g6a | 2 +- src/shaders/vme/batchbuffer.g6b | 70 +++------ src/shaders/vme/batchbuffer.g7a | 2 +- src/shaders/vme/batchbuffer.g7b | 70 +++------ ...gen6_batchbuffer_header.inc => batchbuffer.inc} | 77 +++++++--- src/shaders/vme/gen7_batchbuffer_header.inc | 139 ----------------- src/shaders/vme/gen7_vme_header.inc | 165 --------------------- src/shaders/vme/inter_frame.asm | 86 +++++++---- src/shaders/vme/inter_frame.g6a | 2 +- src/shaders/vme/inter_frame.g6b | 55 ++++--- src/shaders/vme/inter_frame.g7a | 2 +- src/shaders/vme/inter_frame.g7b | 46 +++--- src/shaders/vme/intra_frame.asm | 69 ++++++--- src/shaders/vme/intra_frame.g6a | 2 +- src/shaders/vme/intra_frame.g6b | 62 +++++--- src/shaders/vme/intra_frame.g7a | 2 +- src/shaders/vme/intra_frame.g7b | 62 +++++--- src/shaders/vme/{gen6_vme_header.inc => vme.inc} | 85 ++++++++++- 21 files changed, 511 insertions(+), 687 deletions(-) rename src/shaders/vme/{gen6_batchbuffer_header.inc => batchbuffer.inc} (73%) delete mode 100644 src/shaders/vme/gen7_batchbuffer_header.inc delete mode 100644 src/shaders/vme/gen7_vme_header.inc rename src/shaders/vme/{gen6_vme_header.inc => vme.inc} (75%) diff --git a/src/gen6_vme.c b/src/gen6_vme.c index 3031941..5145e72 100644 --- a/src/gen6_vme.c +++ b/src/gen6_vme.c @@ -374,72 +374,56 @@ gen6_vme_fill_vme_batchbuffer(VADriverContextP ctx, int transform_8x8_mode_flag, struct intel_encoder_context *encoder_context) { - struct intel_batchbuffer *batch = encoder_context->base.batch; - int mb_x, mb_y, i; + struct gen6_vme_context *vme_context = encoder_context->vme_context; int total_mbs = mb_width * mb_height; - int number_mb_cmds = 512; - int starting_mb = 0; - int last_object = 0; + int number_mb_cmds = 128; + int mb_x = 0, mb_y = 0; + int i, count = 0; + unsigned int *command_ptr; - for (i = 0; i < total_mbs / number_mb_cmds; i++) { - mb_x = starting_mb % mb_width; - mb_y = starting_mb / mb_width; - last_object = (total_mbs - starting_mb) == number_mb_cmds; - starting_mb += number_mb_cmds; + dri_bo_map(vme_context->vme_batchbuffer.bo, 1); + command_ptr = vme_context->vme_batchbuffer.bo->virtual; - BEGIN_BATCH(batch, 9); - - OUT_BATCH(batch, CMD_MEDIA_OBJECT | (9 - 2)); - OUT_BATCH(batch, VME_BATCHBUFFER); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); + for (i = 0; i < total_mbs / number_mb_cmds; i++) { + mb_x = count % mb_width; + mb_y = count / mb_width; + + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; /*inline data */ - OUT_BATCH(batch, - kernel << 24 | - transform_8x8_mode_flag << 16 | - mb_width); - OUT_BATCH(batch, - number_mb_cmds << 16 | - mb_y << 8 | - mb_x); - OUT_BATCH(batch, last_object); - - ADVANCE_BATCH(batch); - } + *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); + *command_ptr++ = (number_mb_cmds << 16 | transform_8x8_mode_flag); - if (!last_object) { - number_mb_cmds = total_mbs % number_mb_cmds; - mb_x = starting_mb % mb_width; - mb_y = starting_mb / mb_width; - last_object = 1; - starting_mb += number_mb_cmds; + count += number_mb_cmds; + } - BEGIN_BATCH(batch, 9); + number_mb_cmds = total_mbs - count; - OUT_BATCH(batch, CMD_MEDIA_OBJECT | (9 - 2)); - OUT_BATCH(batch, VME_BATCHBUFFER); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); + if (number_mb_cmds) { + mb_x = count % mb_width; + mb_y = count / mb_width; + + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = kernel; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; /*inline data */ - OUT_BATCH(batch, - kernel << 24 | - transform_8x8_mode_flag << 16 | - mb_width); - OUT_BATCH(batch, - number_mb_cmds << 16 | - mb_y << 8 | - mb_x); - OUT_BATCH(batch, last_object); - - ADVANCE_BATCH(batch); + *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); + *command_ptr++ = (number_mb_cmds << 16 | transform_8x8_mode_flag); } + *command_ptr++ = 0; + *command_ptr++ = MI_BATCH_BUFFER_END; + + dri_bo_unmap(vme_context->vme_batchbuffer.bo); } static void gen6_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) @@ -479,15 +463,14 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx, int width_in_mbs = pSequenceParameter->picture_width_in_mbs; int height_in_mbs = pSequenceParameter->picture_height_in_mbs; - intel_batchbuffer_start_atomic(batch, 0x1000); - - gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); gen6_vme_fill_vme_batchbuffer(ctx, encode_state, width_in_mbs, height_in_mbs, is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, pPicParameter->pic_fields.bits.transform_8x8_mode_flag, encoder_context); + + intel_batchbuffer_start_atomic(batch, 0x1000); gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); BEGIN_BATCH(batch, 2); OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am index 1026509..80a4663 100644 --- a/src/shaders/vme/Makefile.am +++ b/src/shaders/vme/Makefile.am @@ -2,12 +2,12 @@ VME_CORE = batchbuffer.asm intra_frame.asm inter_frame.asm INTEL_G6B = batchbuffer.g6b intra_frame.g6b inter_frame.g6b INTEL_G6A = batchbuffer.g6a intra_frame.g6a inter_frame.g6a -INTEL_GEN6_INC = gen6_batchbuffer_header.inc gen6_vme_header.inc +INTEL_GEN6_INC = batchbuffer.inc vme.inc INTEL_GEN6_ASM = $(INTEL_G6A:%.g6a=%.gen6.asm) INTEL_G7B = batchbuffer.g7b intra_frame.g7b inter_frame.g7b INTEL_G7A = batchbuffer.g7a intra_frame.g7a inter_frame.g7a -INTEL_GEN7_INC = gen7_batchbuffer_header.inc gen7_vme_header.inc +INTEL_GEN7_INC = batchbuffer.inc vme.inc INTEL_GEN7_ASM = $(INTEL_G7A:%.g7a=%.gen7.asm) TARGETS = diff --git a/src/shaders/vme/batchbuffer.asm b/src/shaders/vme/batchbuffer.asm index 8e1ed51..b8ba3b2 100644 --- a/src/shaders/vme/batchbuffer.asm +++ b/src/shaders/vme/batchbuffer.asm @@ -27,58 +27,29 @@ /* * __START */ -__INTER_START: - and.z.f0.1 (1) remainder_cmds<1>:uw total_mbs<0,1,0>:uw 0x0003:uw {align1}; - and.z.f0.0 (1) total_mbs<1>:uw total_mbs<0,1,0>:uw 0xfffc:uw {align1}; - +__START: mov (16) tmp_reg0<1>:ud 0x0:ud {align1} ; + mov (16) tmp_reg2<1>:ud 0x0:ud {align1} ; + mov (1) obw_header.20<1>:ub thread_id_ub {align1}; /* dispatch id */ mov (8) media_object_ud<1>:ud 0x0:ud {align1} ; mov (1) media_object0_ud<1>:ud CMD_MEDIA_OBJECT {align1} ; mov (1) media_object1_ud<1>:ud mtype_ub<0,1,0>ub {align1}; mov (1) media_object6_width<1>:uw width_in_mb<0,1,0>:uw {align1}; - mov (1) media_object7_ud<1>:ud transform_8x8_ub<0,1,0>ub {align1}; + mov (1) media_object7_flag<1>:uw transform_8x8_ub<0,1,0>ub {align1}; + mov (1) media_object7_num_mbs<1>:uw NUM_MACROBLOCKS_PER_COMMAND:uw {align1} ; - mul (1) tmp_reg0.8<1>:ud width_in_mb<0,1,0>:uw mb_y<0,1,0>:ub {align1}; - add (1) tmp_reg0.8<1>:ud tmp_reg0.8<0,1,0>:ud mb_x<0,1,0>:ub {align1}; - mul (1) tmp_reg0.8<1>:ud tmp_reg0.8<0,1,0>:ud 0x2:ud {align1} ; - mov (1) tmp_reg0.20<1>:ub thread_id_ub {align1}; /* dispatch id */ - - (f0.0)jmpi (1) __REMAINDER ; + mov (1) width_per_row<1>:ud width_in_mb<0,1,0>:uw {align1} ; + and.z.f0.1 (1) remainder_cmds<1>:ud total_mbs<0,1,0>:ud (NUM_MACROBLOCKS_PER_COMMAND - 1):ud {align1} ; + and.z.f0.0 (1) total_mbs<1>:ud total_mbs<0,1,0>:ud -NUM_MACROBLOCKS_PER_COMMAND:ud {align1} ; -__CMD_LOOP: - mov (8) msg_reg0.0<1>:ud tmp_reg0<8,8,1>:ud {align1}; - add (1) tmp_reg0.8<1>:ud tmp_reg0.8<0,1,0>:ud 8:uw {align1} ; + (f0.0)jmpi (1) __REMAINDER ; - mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ; +__CMD_LOOP: + mov (8) msg_reg0.0<1>:ud obw_header<8,8,1>:ud {align1}; mov (8) msg_reg1<1>:ud media_object_ud<8,8,1>:ud {align1}; - add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1}; - cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1}; - (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ; - (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ; - mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ; - mov (8) msg_reg2<1>:ud media_object_ud<8,8,1>:ud {align1}; - add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1}; - cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1}; - (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ; - (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ; - - mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ; - mov (8) msg_reg3<1>:ud media_object_ud<8,8,1>:ud {align1}; - add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1}; - cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1}; - (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ; - (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ; - - mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ; - mov (8) msg_reg4<1>:ud media_object_ud<8,8,1>:ud {align1}; - add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1}; - cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1}; - (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ; - (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ; - -/* bind index 5, write 8 oword, msg type: 8(OWord Block Write) */ +/* bind index 5, write 2 oword, msg type: 8(OWord Block Write) */ send (16) msg_ind obw_wb @@ -86,34 +57,35 @@ send (16) data_port( OBW_CACHE_TYPE, OBW_MESSAGE_TYPE, - OBW_CONTROL_4, + OBW_CONTROL_2, OBW_BIND_IDX, OBW_WRITE_COMMIT_CATEGORY, OBW_HEADER_PRESENT ) - mlen 5 + mlen 2 rlen obw_wb_length {align1}; + + /* (x, y) of the first macroblock */ + add (1) count<1>:ud count<0,1,0>:ud NUM_MACROBLOCKS_PER_COMMAND:uw {align1} ; + math (1) quotient<1>:ud count<0,1,0>:ud width_per_row<0,1,0>:ud intdivmod {align1} ; + shl (1) quotient<1>:ud quotient<0,1,0>:ud 8:uw {align1} ; + add (1) quotient<1>:ud quotient<0,1,0>:ud remainder<0,1,0>:ud {align1} ; + mov (1) media_object6_xy<1>:uw quotient<0,1,0>:uw {align1} ; - - add.z.f0.0 (1) total_mbs<1>:w total_mbs<0,1,0>:w -4:w {align1}; + /* the new offset */ + add (1) obw_header.8<1>:ud obw_header.8<0,1,0>:ud 2:uw {align1} ; + + add.z.f0.0 (1) total_mbs<1>:w total_mbs<0,1,0>:w -NUM_MACROBLOCKS_PER_COMMAND:w {align1} ; (-f0.0)jmpi (1) __CMD_LOOP ; - + __REMAINDER: - (f0.1)jmpi (1) __DONE ; + (f0.1)jmpi (1) __DONE ; -__REMAINDER_LOOP: - mov (8) msg_reg0.0<1>:ud tmp_reg0<8,8,1>:ud {align1} ; - add (1) tmp_reg0.8<1>:ud tmp_reg0.8<0,1,0>:ud 2:uw {align1} ; - - mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ; + mov (1) media_object7_num_mbs<1>:uw remainder_cmds<0,1,0>:uw {align1} ; + mov (8) msg_reg0.0<1>:ud obw_header<8,8,1>:ud {align1}; mov (8) msg_reg1<1>:ud media_object_ud<8,8,1>:ud {align1}; - add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1}; - cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1}; - (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ; - (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ; - -/* bind index 5, write 2 oword, msg type: 8(OWord Block Write) */ + send (16) msg_ind obw_wb @@ -129,17 +101,14 @@ send (16) mlen 2 rlen obw_wb_length {align1}; - - add.z.f0.1 (1) remainder_cmds<1>:w remainder_cmds<0,1,0>:w -1:w; - (-f0.1)jmpi (1) __REMAINDER_LOOP ; + + /* the new offset */ + add (1) obw_header.8<1>:ud obw_header.8<0,1,0>:ud 2:uw {align1} ; __DONE: - cmp.e.f0.0 (1) null<1>:uw last_object<0,1,0>:uw 1:uw {align1}; - (-f0.0)jmpi (1) __EXIT ; - /* bind index 5, write 1 oword, msg type: 8(OWord Block Write) */ - mov (8) msg_reg0.0<1>:ud tmp_reg0<8,8,1>:ud {align1} ; + mov (8) msg_reg0.0<1>:ud obw_header<8,8,1>:ud {align1} ; mov (4) msg_reg1.0<1>:ud 0x0:ud {align1} ; mov (1) msg_reg1.4<1>:ud MI_BATCH_BUFFER_END {align1} ; diff --git a/src/shaders/vme/batchbuffer.g6a b/src/shaders/vme/batchbuffer.g6a index 7b29c10..08ec2bc 100644 --- a/src/shaders/vme/batchbuffer.g6a +++ b/src/shaders/vme/batchbuffer.g6a @@ -24,6 +24,6 @@ * Xiang Haihao */ -#include "gen6_batchbuffer_header.inc" +#include "batchbuffer.inc" #include "batchbuffer.asm" diff --git a/src/shaders/vme/batchbuffer.g6b b/src/shaders/vme/batchbuffer.g6b index 26fd464..cd3e8cc 100644 --- a/src/shaders/vme/batchbuffer.g6b +++ b/src/shaders/vme/batchbuffer.g6b @@ -1,59 +1,33 @@ - { 0x01000005, 0x22202d29, 0x020000a6, 0x00030003 }, - { 0x01000005, 0x20a62d29, 0x000000a6, 0xfffcfffc }, { 0x00800001, 0x21000061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x21400061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x21140231, 0x00000014, 0x00000000 }, { 0x00600001, 0x22000061, 0x00000000, 0x00000000 }, { 0x00000001, 0x22000061, 0x00000000, 0x71000006 }, - { 0x00000001, 0x22040221, 0x000000a3, 0x00000000 }, - { 0x00000001, 0x221a0129, 0x000000a0, 0x00000000 }, - { 0x00000001, 0x221c0221, 0x000000a2, 0x00000000 }, - { 0x00000041, 0x21084521, 0x000000a0, 0x000000a5 }, - { 0x00000040, 0x21084421, 0x00000108, 0x000000a4 }, - { 0x00000041, 0x21080c21, 0x00000108, 0x00000002 }, - { 0x00000001, 0x21140231, 0x00000014, 0x00000000 }, - { 0x00010020, 0x34001c00, 0x00001400, 0x0000003a }, + { 0x00000001, 0x22040221, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x221a0129, 0x000000a6, 0x00000000 }, + { 0x00000001, 0x221c0229, 0x000000a4, 0x00000000 }, + { 0x00000001, 0x221e0169, 0x00000000, 0x02000200 }, + { 0x00000001, 0x21280121, 0x000000a6, 0x00000000 }, + { 0x01000005, 0x21240c21, 0x020000a0, 0x000001ff }, + { 0x01000005, 0x20a00c21, 0x000000a0, 0xfffffe00 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000016 }, { 0x00600001, 0x20000022, 0x008d0100, 0x00000000 }, - { 0x00000040, 0x21082c21, 0x00000108, 0x00080008 }, - { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 }, { 0x00600001, 0x20200022, 0x008d0200, 0x00000000 }, - { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 }, - { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 }, - { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 }, - { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 }, - { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 }, - { 0x00600001, 0x20400022, 0x008d0200, 0x00000000 }, - { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 }, - { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 }, - { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 }, - { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 }, - { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 }, - { 0x00600001, 0x20600022, 0x008d0200, 0x00000000 }, - { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 }, - { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 }, - { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 }, - { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 }, - { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 }, - { 0x00600001, 0x20800022, 0x008d0200, 0x00000000 }, - { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 }, - { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 }, - { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 }, - { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 }, - { 0x05800031, 0x22401cdd, 0x00000000, 0x0a1b0405 }, - { 0x01000040, 0x20a63dad, 0x000000a6, 0xfffcfffc }, - { 0x00110020, 0x34001c00, 0x00001400, 0xffffffc6 }, - { 0x00010020, 0x34001c00, 0x02001400, 0x00000016 }, - { 0x00600001, 0x20000022, 0x008d0100, 0x00000000 }, + { 0x05800031, 0x22401cdd, 0x00000000, 0x041b0205 }, + { 0x00000040, 0x21202c21, 0x00000120, 0x02000200 }, + { 0x0b000038, 0x21400421, 0x00000120, 0x00000128 }, + { 0x00000009, 0x21402c21, 0x00000140, 0x00080008 }, + { 0x00000040, 0x21400421, 0x00000140, 0x00000160 }, + { 0x00000001, 0x22180129, 0x00000140, 0x00000000 }, { 0x00000040, 0x21082c21, 0x00000108, 0x00020002 }, - { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 }, + { 0x01000040, 0x20a03dad, 0x000000a0, 0xfe00fe00 }, + { 0x00110020, 0x34001c00, 0x00001400, 0xffffffea }, + { 0x00010020, 0x34001c00, 0x02001400, 0x0000000a }, + { 0x00000001, 0x221e0129, 0x00000124, 0x00000000 }, + { 0x00600001, 0x20000022, 0x008d0100, 0x00000000 }, { 0x00600001, 0x20200022, 0x008d0200, 0x00000000 }, - { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 }, - { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 }, - { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 }, - { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 }, { 0x05800031, 0x22401cdd, 0x00000000, 0x041b0205 }, - { 0x01000040, 0x22203dad, 0x02000220, 0xffffffff }, - { 0x00110020, 0x34001c00, 0x02001400, 0xffffffea }, - { 0x01000010, 0x20002d28, 0x000000a8, 0x00010001 }, - { 0x00110020, 0x34001c00, 0x00001400, 0x00000008 }, + { 0x00000040, 0x21082c21, 0x00000108, 0x00020002 }, { 0x00600001, 0x20000022, 0x008d0100, 0x00000000 }, { 0x00400001, 0x20200062, 0x00000000, 0x00000000 }, { 0x00000001, 0x20240062, 0x00000000, 0x05000000 }, diff --git a/src/shaders/vme/batchbuffer.g7a b/src/shaders/vme/batchbuffer.g7a index cdfbc7e..08ec2bc 100644 --- a/src/shaders/vme/batchbuffer.g7a +++ b/src/shaders/vme/batchbuffer.g7a @@ -24,6 +24,6 @@ * Xiang Haihao */ -#include "gen7_batchbuffer_header.inc" +#include "batchbuffer.inc" #include "batchbuffer.asm" diff --git a/src/shaders/vme/batchbuffer.g7b b/src/shaders/vme/batchbuffer.g7b index 159d8fb..1bbbe09 100644 --- a/src/shaders/vme/batchbuffer.g7b +++ b/src/shaders/vme/batchbuffer.g7b @@ -1,59 +1,33 @@ - { 0x01000005, 0x22202d29, 0x020000a6, 0x00030003 }, - { 0x01000005, 0x20a62d29, 0x000000a6, 0xfffcfffc }, { 0x00800001, 0x21000061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x21400061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x21140231, 0x00000014, 0x00000000 }, { 0x00600001, 0x22000061, 0x00000000, 0x00000000 }, { 0x00000001, 0x22000061, 0x00000000, 0x71000006 }, - { 0x00000001, 0x22040221, 0x000000a3, 0x00000000 }, - { 0x00000001, 0x221a0129, 0x000000a0, 0x00000000 }, - { 0x00000001, 0x221c0221, 0x000000a2, 0x00000000 }, - { 0x00000041, 0x21084521, 0x000000a0, 0x000000a5 }, - { 0x00000040, 0x21084421, 0x00000108, 0x000000a4 }, - { 0x00000041, 0x21080c21, 0x00000108, 0x00000002 }, - { 0x00000001, 0x21140231, 0x00000014, 0x00000000 }, - { 0x00010020, 0x34001c00, 0x00001400, 0x0000003a }, + { 0x00000001, 0x22040221, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x221a0129, 0x000000a6, 0x00000000 }, + { 0x00000001, 0x221c0229, 0x000000a4, 0x00000000 }, + { 0x00000001, 0x221e0169, 0x00000000, 0x02000200 }, + { 0x00000001, 0x21280121, 0x000000a6, 0x00000000 }, + { 0x01000005, 0x21240c21, 0x020000a0, 0x000001ff }, + { 0x01000005, 0x20a00c21, 0x000000a0, 0xfffffe00 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x00000016 }, { 0x00600001, 0x28000021, 0x008d0100, 0x00000000 }, - { 0x00000040, 0x21082c21, 0x00000108, 0x00080008 }, - { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 }, { 0x00600001, 0x28200021, 0x008d0200, 0x00000000 }, - { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 }, - { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 }, - { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 }, - { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 }, - { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 }, - { 0x00600001, 0x28400021, 0x008d0200, 0x00000000 }, - { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 }, - { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 }, - { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 }, - { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 }, - { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 }, - { 0x00600001, 0x28600021, 0x008d0200, 0x00000000 }, - { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 }, - { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 }, - { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 }, - { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 }, - { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 }, - { 0x00600001, 0x28800021, 0x008d0200, 0x00000000 }, - { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 }, - { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 }, - { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 }, - { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 }, - { 0x0a800031, 0x20001cac, 0x00000800, 0x0a0a0405 }, - { 0x01000040, 0x20a63dad, 0x000000a6, 0xfffcfffc }, - { 0x00110020, 0x34001c00, 0x00001400, 0xffffffc6 }, - { 0x00010020, 0x34001c00, 0x02001400, 0x00000016 }, - { 0x00600001, 0x28000021, 0x008d0100, 0x00000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0205 }, + { 0x00000040, 0x21202c21, 0x00000120, 0x02000200 }, + { 0x0b000038, 0x21400421, 0x00000120, 0x00000128 }, + { 0x00000009, 0x21402c21, 0x00000140, 0x00080008 }, + { 0x00000040, 0x21400421, 0x00000140, 0x00000160 }, + { 0x00000001, 0x22180129, 0x00000140, 0x00000000 }, { 0x00000040, 0x21082c21, 0x00000108, 0x00020002 }, - { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 }, + { 0x01000040, 0x20a03dad, 0x000000a0, 0xfe00fe00 }, + { 0x00110020, 0x34001c00, 0x00001400, 0xffffffea }, + { 0x00010020, 0x34001c00, 0x02001400, 0x0000000a }, + { 0x00000001, 0x221e0129, 0x00000124, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0100, 0x00000000 }, { 0x00600001, 0x28200021, 0x008d0200, 0x00000000 }, - { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 }, - { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 }, - { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 }, - { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 }, { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0205 }, - { 0x01000040, 0x22203dad, 0x02000220, 0xffffffff }, - { 0x00110020, 0x34001c00, 0x02001400, 0xffffffea }, - { 0x01000010, 0x20002d28, 0x000000a8, 0x00010001 }, - { 0x00110020, 0x34001c00, 0x00001400, 0x00000008 }, + { 0x00000040, 0x21082c21, 0x00000108, 0x00020002 }, { 0x00600001, 0x28000021, 0x008d0100, 0x00000000 }, { 0x00400001, 0x28200061, 0x00000000, 0x00000000 }, { 0x00000001, 0x28240061, 0x00000000, 0x05000000 }, diff --git a/src/shaders/vme/gen6_batchbuffer_header.inc b/src/shaders/vme/batchbuffer.inc similarity index 73% rename from src/shaders/vme/gen6_batchbuffer_header.inc rename to src/shaders/vme/batchbuffer.inc index d1bf50b..5cb86ab 100644 --- a/src/shaders/vme/gen6_batchbuffer_header.inc +++ b/src/shaders/vme/batchbuffer.inc @@ -31,8 +31,16 @@ define(`BIND_IDX_OUTPUT', `3') define(`BIND_IDX_INEP', `4') define(`BIND_IDX_VME_BATCHBUFFER', `5') +#ifdef DEV_SNB + define(`OBW_CACHE_TYPE', `5') +#else + +define(`OBW_CACHE_TYPE', `10') + +#endif + define(`OBW_MESSAGE_TYPE', `8') define(`OBW_BIND_IDX', `BIND_IDX_VME_BATCHBUFFER') @@ -43,13 +51,23 @@ define(`OBW_CONTROL_2', `2') /* 2 OWords */ define(`OBW_CONTROL_3', `3') /* 4 OWords */ define(`OBW_CONTROL_4', `4') /* 8 OWords */ +#ifdef DEV_SNB + define(`OBW_WRITE_COMMIT_CATEGORY', `1') /* write commit on Sandybrige */ +#else + +define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */ + +#endif + define(`OBW_HEADER_PRESENT', `1') define(`CMD_MEDIA_OBJECT', `0x71000006:UD') define(`MI_BATCH_BUFFER_END', `0x05000000:UD') +define(`NUM_MACROBLOCKS_PER_COMMAND', `512') + /* GRF registers * r0 header * r1~r4 constant buffer (reserved) @@ -73,23 +91,24 @@ define(`thread_id_ub', `r0.20<0,1,0>:UB') /* thread id in payload */ * GRF 5 -- inline data */ define(`inline_reg0', `r5') -define(`width_in_mb', `inline_reg0.0') /* the picture width in macroblocks */ -define(`transform_8x8_ub', `inline_reg0.2') /* transform_8x8 flag */ -define(`mtype_ub', `inline_reg0.3') /* 0: INTRA, 1: INTER */ -define(`mb_x', `inline_reg0.4') -define(`mb_y', `inline_reg0.5') -define(`mb_xy', `inline_reg0.4') -define(`total_mbs', `inline_reg0.6') /* the number of macroblock commands - * being processed by the kernel - */ -define(`last_object', `inline_reg0.8') /* the last object flag */ +define(`total_mbs', `inline_reg0.0') /* the number of macroblocks in a picture */ +define(`transform_8x8_ub', `inline_reg0.4') /* transform_8x8 flag */ +define(`mtype_ub', `inline_reg0.5') /* 0: INTRA, 1: INTER */ +define(`width_in_mb', `inline_reg0.6') /* the picture width in macroblocks */ + /* * GRF 8~15 -- temporary registers */ define(`tmp_reg0', `r8') +define(`obw_header', `tmp_reg0') define(`tmp_reg1', `r9') +define(`count', `tmp_reg1.0') +define(`remainder_cmds', `tmp_reg1.4') +define(`width_per_row', `tmp_reg1.8') define(`tmp_reg2', `r10') +define(`quotient', `tmp_reg2') define(`tmp_reg3', `r11') +define(`remainder', `tmp_reg3') define(`tmp_reg4', `r12') define(`tmp_reg5', `r13') define(`tmp_reg6', `r14') @@ -107,28 +126,37 @@ define(`media_object4_ud', `r16.16') define(`media_object5_ud', `r16.20') define(`media_object6_ud', `r16.24') define(`media_object6_xy', `r16.24') +define(`media_object6_x', `r16.24') +define(`media_object6_y', `r16.25') define(`media_object6_width', `r16.26') define(`media_object7_ud', `r16.28') - -/* - * GRF 17 - */ -define(`remainder_cmds', `r17.0') +define(`media_object7_flag', `r16.28') +define(`media_object7_num_mbs', `r16.30') /* * GRF 18 write back for Oword Block Write message */ +#if DEV_SNB + +define(`obw_wb', `r18') +define(`obw_wb_length', `1') + +#else + /* * write commit is removed on Ivybridge */ -define(`obw_wb', `r18') -define(`obw_wb_length', `1') +define(`obw_wb', `null<1>:W') +define(`obw_wb_length', `0') +#endif /* * Message Payload registers */ +#if DEV_SNB + define(`msg_ind', `0') define(`msg_reg0', `m0') define(`msg_reg1', `m1') @@ -139,3 +167,18 @@ define(`msg_reg5', `m5') define(`msg_reg6', `m6') define(`msg_reg7', `m7') define(`msg_reg8', `m8') + +#else + +define(`msg_ind', `64') +define(`msg_reg0', `g64') +define(`msg_reg1', `g65') +define(`msg_reg2', `g66') +define(`msg_reg3', `g67') +define(`msg_reg4', `g68') +define(`msg_reg5', `g69') +define(`msg_reg6', `g70') +define(`msg_reg7', `g71') +define(`msg_reg8', `g72') + +#endif diff --git a/src/shaders/vme/gen7_batchbuffer_header.inc b/src/shaders/vme/gen7_batchbuffer_header.inc deleted file mode 100644 index d13620f..0000000 --- a/src/shaders/vme/gen7_batchbuffer_header.inc +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Xiang Haihao - */ - -define(`BIND_IDX_OUTPUT', `0') -define(`BIND_IDX_VME', `1') -define(`BIND_IDX_VME_REF0', `2') -define(`BIND_IDX_VME_REF1', `3') -define(`BIND_IDX_INEP', `4') -define(`BIND_IDX_VME_BATCHBUFFER', `5') - -define(`OBW_CACHE_TYPE', `10') - -define(`OBW_MESSAGE_TYPE', `8') - -define(`OBW_BIND_IDX', `BIND_IDX_VME_BATCHBUFFER') - -define(`OBW_CONTROL_0', `0') /* 1 OWord, low 128 bits */ -define(`OBW_CONTROL_1', `1') /* 1 OWord, high 128 bits */ -define(`OBW_CONTROL_2', `2') /* 2 OWords */ -define(`OBW_CONTROL_3', `3') /* 4 OWords */ -define(`OBW_CONTROL_4', `4') /* 8 OWords */ - -define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */ - -define(`OBW_HEADER_PRESENT', `1') - -define(`CMD_MEDIA_OBJECT', `0x71000006:UD') -define(`MI_BATCH_BUFFER_END', `0x05000000:UD') - -/* GRF registers - * r0 header - * r1~r4 constant buffer (reserved) - * r5 inline data - * r6~r7 reserved - * r8~r15 temporary registers - * r16 write back of Oword Block Write - */ -/* - * GRF 0 -- header - */ -define(`thread_id_ub', `r0.20<0,1,0>:UB') /* thread id in payload */ - -/* - * GRF 1~4 -- Constant Buffer (reserved) - */ - -/* - * GRF 5 -- inline data - */ -define(`inline_reg0', `r5') -define(`width_in_mb', `inline_reg0.0') /* the picture width in macroblocks */ -define(`transform_8x8_ub', `inline_reg0.2') /* transform_8x8 flag */ -define(`mtype_ub', `inline_reg0.3') /* 0: INTRA, 1: INTER */ -define(`mb_x', `inline_reg0.4') -define(`mb_y', `inline_reg0.5') -define(`mb_xy', `inline_reg0.4') -define(`total_mbs', `inline_reg0.6') /* the number of macroblock commands - * being processed by the kernel - */ -define(`last_object', `inline_reg0.8') /* the last object flag */ -/* - * GRF 8~15 -- temporary registers - */ -define(`tmp_reg0', `r8') -define(`tmp_reg1', `r9') -define(`tmp_reg2', `r10') -define(`tmp_reg3', `r11') -define(`tmp_reg4', `r12') -define(`tmp_reg5', `r13') -define(`tmp_reg6', `r14') -define(`tmp_reg7', `r15') - -/* - * GRF 16 - */ -define(`media_object_ud', `r16.0') -define(`media_object0_ud', `r16.0') -define(`media_object1_ud', `r16.4') -define(`media_object2_ud', `r16.8') -define(`media_object3_ud', `r16.12') -define(`media_object4_ud', `r16.16') -define(`media_object5_ud', `r16.20') -define(`media_object6_ud', `r16.24') -define(`media_object6_xy', `r16.24') -define(`media_object6_width', `r16.26') -define(`media_object7_ud', `r16.28') - -/* - * GRF 17 - */ -define(`remainder_cmds', `r17.0') - -/* - * GRF 16 write back for Oword Block Write message - */ - -/* - * write commit is removed on Ivybridge - */ -define(`obw_wb', `null<1>:W') -define(`obw_wb_length', `0') - - -/* - * Message Payload registers - */ -define(`msg_ind', `64') -define(`msg_reg0', `g64') -define(`msg_reg1', `g65') -define(`msg_reg2', `g66') -define(`msg_reg3', `g67') -define(`msg_reg4', `g68') -define(`msg_reg5', `g69') -define(`msg_reg6', `g70') -define(`msg_reg7', `g71') -define(`msg_reg8', `g72') diff --git a/src/shaders/vme/gen7_vme_header.inc b/src/shaders/vme/gen7_vme_header.inc deleted file mode 100644 index 5ea1811..0000000 --- a/src/shaders/vme/gen7_vme_header.inc +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright © <2010>, Intel Corporation. - * - * This program is licensed under the terms and conditions of the - * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at - * http://www.opensource.org/licenses/eclipse-1.0.php. - * - */ -// Modual name: ME_header.inc -// -// Global symbols define -// - -/* - * Constant - */ -define(`VME_MESSAGE_TYPE_INTER', `1') -define(`VME_MESSAGE_TYPE_INTRA', `2') -define(`VME_MESSAGE_TYPE_MIXED', `3') - -define(`BLOCK_32X1', `0x0000001F') -define(`BLOCK_4X16', `0x000F0003') - -define(`LUMA_INTRA_16x16_DISABLE', `0x1') -define(`LUMA_INTRA_8x8_DISABLE', `0x2') -define(`LUMA_INTRA_4x4_DISABLE', `0x4') - -define(`INTRA_PRED_AVAIL_FLAG_AE', `0x60') -define(`INTRA_PRED_AVAIL_FLAG_B', `0x10') -define(`INTRA_PRED_AVAIL_FLAG_C', `0x8') -define(`INTRA_PRED_AVAIL_FLAG_D', `0x4') - -define(`BIND_IDX_VME', `0') -define(`BIND_IDX_VME_REF0', `1') -define(`BIND_IDX_VME_REF1', `2') -define(`BIND_IDX_OUTPUT', `3') -define(`BIND_IDX_INEP', `4') - -define(`SUB_PEL_MODE_INTEGER', `0x00000000') -define(`SUB_PEL_MODE_HALF', `0x00001000') -define(`SUB_PEL_MODE_QUARTER', `0x00003000') - -define(`INTER_SAD_NONE', `0x00000000') -define(`INTER_SAD_HAAR', `0x00200000') - -define(`INTRA_SAD_NONE', `0x00000000') -define(`INTRA_SAD_HAAR', `0x00800000') - -define(`INTER_PART_MASK', `0x7E000000') - -define(`REF_REGION_SIZE', `0x2830:UW') - -define(`BI_SUB_MB_PART_MASK', `0x0c000000') -define(`MAX_NUM_MV', `0x00000020') -define(`SEARCH_PATH_LEN', `0x00003F3F') - -define(`INTRA_PREDICTORE_MODE', `0x11111111:UD') - -define(`OBW_CACHE_TYPE', `10') - -define(`OBW_MESSAGE_TYPE', `8') - -define(`OBW_BIND_IDX', `BIND_IDX_OUTPUT') - -define(`OBW_CONTROL_0', `0') /* 1 OWord, low 128 bits */ -define(`OBW_CONTROL_1', `1') /* 1 OWord, high 128 bits */ -define(`OBW_CONTROL_2', `2') /* 2 OWords */ -define(`OBW_CONTROL_3', `3') /* 4 OWords */ - -define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */ - -define(`OBW_HEADER_PRESENT', `1') - -/* GRF registers - * r0 header - * r1~r4 constant buffer (reserved) - * r5 inline data - * r6~r11 reserved - * r12 write back of VME message - * r13 write back of Oword Block Write - */ -/* - * GRF 0 -- header - */ -define(`thread_id_ub', `r0.20<0,1,0>:UB') /* thread id in payload */ - -/* - * GRF 1~4 -- Constant Buffer (reserved) - */ - -/* - * GRF 5 -- inline data - */ -define(`inline_reg0', `r5') -define(`w_in_mb_uw', `inline_reg0.2') -define(`orig_xy_ub', `inline_reg0.0') -define(`orig_x_ub', `inline_reg0.0') /* in macroblock */ -define(`orig_y_ub', `inline_reg0.1') -define(`transform_8x8_ub', `inline_reg0.4') - -/* - * GRF 6~11 -- reserved - */ - -/* - * GRF 12~15 -- write back for VME message - */ -define(`vme_wb', `r12') -define(`vme_wb0', `r12') -define(`vme_wb1', `r13') -define(`vme_wb2', `r14') -define(`vme_wb3', `r15') - -/* - * GRF 16 -- reserved - */ -/* - * write commit is removed on Ivybridge - */ -define(`obw_wb', `null<1>:W') -define(`obw_wb_length', `0') -/* - * GRF 18~21 -- Intra Neighbor Edge Pixels - */ -define(`INEP_ROW', `r18') -define(`INEP_COL0', `r20') -define(`INEP_COL1', `r21') - -/* - * temporary registers - */ -define(`tmp_reg0', `r32') -define(`tmp_reg1', `r33') -define(`intra_part_mask_ub', `tmp_reg1.28') -define(`mb_intra_struct_ub', `tmp_reg1.29') -define(`tmp_reg2', `r34') -define(`tmp_x_w', `tmp_reg2.0') -define(`tmp_reg3', `r35') - -/* - * Message Payload registers - */ -define(`msg_ind', `64') -define(`msg_reg0', `g64') -define(`msg_reg1', `g65') -define(`msg_reg2', `g66') -define(`msg_reg3', `g67') -define(`msg_reg4', `g68') - -/* - * VME message payload - */ -define(`vme_msg_length', `5') -define(`vme_intra_wb_length', `1') -define(`vme_inter_wb_length', `6') -define(`vme_msg_ind', `msg_ind') -define(`vme_msg_0', `msg_reg0') -define(`vme_msg_1', `msg_reg1') -define(`vme_msg_2', `msg_reg2') -define(`vme_msg_3', `msg_reg3') -define(`vme_msg_4', `msg_reg4') - - - - diff --git a/src/shaders/vme/inter_frame.asm b/src/shaders/vme/inter_frame.asm index fbcfa7d..5bab1cc 100644 --- a/src/shaders/vme/inter_frame.asm +++ b/src/shaders/vme/inter_frame.asm @@ -21,34 +21,40 @@ __INTER_START: mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1}; mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg3.0<1>:UD 0x0:UD {align1}; -/* - * VME message - */ -/* m0 */ -mul (2) tmp_reg0.8<1>:UW orig_xy_ub<2,2,1>:UB 16:UW {align1}; /* Source = (x, y) * 16 */ +shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* Source = (x, y) * 16 */ #ifdef DEV_SNB -mul (2) tmp_reg0.0<1>:UW orig_xy_ub<2,2,1>:UB 16:UW {align1}; -add (1) tmp_reg0.0<1>:W tmp_reg0.0<2,2,1>:W -16:W {align1}; /* Reference = (x-16,y-12)-(x+32,y+24) */ -add (1) tmp_reg0.2<1>:W tmp_reg0.2<2,2,1>:W -12:W {align1}; +shl (2) vme_m0.0<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; +add (1) vme_m0.0<1>:W vme_m0.0<2,2,1>:W -16:W {align1}; /* Reference = (x-16,y-12)-(x+32,y+24) */ +add (1) vme_m0.2<1>:W vme_m0.2<2,2,1>:W -12:W {align1}; #else -mov (1) tmp_reg0.0<1>:W -16:W {align1} ; /* Reference = (x-16,y-12)-(x+32,y+24) */ -mov (1) tmp_reg0.2<1>:W -12:W {align1} ; +mov (1) vme_m0.0<1>:W -16:W {align1} ; /* Reference = (x-16,y-12)-(x+32,y+24) */ +mov (1) vme_m0.2<1>:W -12:W {align1} ; #endif -mov (1) tmp_reg0.12<1>:UD INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER:UD {align1}; /* 16x16 Source, 1/4 pixel, harr */ - +mov (1) vme_m0.12<1>:UD INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER:UD {align1}; /* 16x16 Source, 1/4 pixel, harr */ +mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ +mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */ + +mov (1) vme_m1.4<1>:UD MAX_NUM_MV:UD {align1}; /* Default value MAX 32 MVs */ +mov (1) vme_m1.8<1>:UD SEARCH_PATH_LEN:UD {align1}; -mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ -mov (1) tmp_reg0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */ -mov (8) vme_msg_0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1}; +mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; +shl (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x2:UD {align1}; +mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* + * VME message + */ +/* m0 */ +__VME_LOOP: +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; /* m1 */ -mov (1) tmp_reg1.4<1>:UD MAX_NUM_MV:UD {align1}; /* Default value MAX 32 MVs */ -mov (1) tmp_reg1.8<1>:UD SEARCH_PATH_LEN:UD {align1}; - -mov (8) vme_msg_1<1>:UD tmp_reg1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1}; /* m2 */ mov (8) vme_msg_2<1>:UD 0x0:UD {align1}; @@ -72,27 +78,22 @@ send (8) mlen vme_msg_length rlen vme_inter_wb_length {align1}; - /* * Oword Block Write message */ -mul (1) tmp_reg3.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; -add (1) tmp_reg3.8<1>:UD tmp_reg3.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; -mul (1) tmp_reg3.8<1>:UD tmp_reg3.8<0,1,0>:UD 0x4:UD {align1}; -mov (1) tmp_reg3.20<1>:UB thread_id_ub {align1}; /* dispatch id */ -mov (8) msg_reg0.0<1>:UD tmp_reg3.0<8,8,1>:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1}; #ifdef DEV_SNB -mov (2) tmp_reg3.0<1>:UW vme_wb1.0<2,2,1>:UB {align1}; -add (1) tmp_reg3.0<1>:W tmp_reg3.0<2,2,1>:W -64:W {align1}; -add (1) tmp_reg3.2<1>:W tmp_reg3.2<2,2,1>:W -48:W {align1}; +mov (2) obw_m1.0<1>:UW vme_wb1.0<2,2,1>:UB {align1}; +add (1) obw_m1.0<1>:W obw_m1.0<2,2,1>:W -64:W {align1}; +add (1) obw_m1.2<1>:W obw_m1.2<2,2,1>:W -48:W {align1}; #else -mov (2) tmp_reg3.0<1>:UW vme_wb1.0<2,2,1>:B {align1}; +mov (2) obw_m1.0<1>:UW vme_wb1.0<2,2,1>:B {align1}; #endif -mov (8) msg_reg1.0<1>:UD tmp_reg3.0<8,8,0>:UD {align1}; +mov (8) msg_reg1.0<1>:UD obw_m1.0<8,8,0>:UD {align1}; -mov (8) msg_reg2.0<1>:UD tmp_reg3.0<8,8,0>:UD {align1}; +mov (8) msg_reg2.0<1>:UD obw_m1.0<8,8,0>:UD {align1}; /* bind index 3, write 4 oword, msg type: 8(OWord Block Write) */ send (16) @@ -110,6 +111,29 @@ send (16) mlen 3 rlen obw_wb_length {align1}; + +add (1) orig_x_ub<1>:ub orig_x_ub<0,1,0>:ub 1:uw {align1} ; +add (1) vme_m0.8<1>:UW vme_m0.8<0,1,0>:UW 16:UW {align1}; /* X += 16 */ +#ifdef DEV_SNB +add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W 16:W {align1}; /* X += 16 */ +#endif + +cmp.e.f0.0 (1) null<1>:uw w_in_mb_uw<0,1,0>:uw orig_x_ub<0,1,0>:ub {align1}; +/* (0, y + 1) */ +(f0.0)mov (1) orig_x_ub<1>:ub 0:uw {align1} ; +(f0.0)mov (1) vme_m0.8<1>:uw 0:uw {align1} ; +(f0.0)add (1) vme_m0.10<1>:uw vme_m0.10<0,1,0>:uw 16:uw {align1} ; +#ifdef DEV_SNB +(f0.0)mov (1) vme_m0.0<1>:w -16:W {align1}; /* Reference = (x-16,y-12)-(x+32,y+24) */ +(f0.0)add (1) vme_m0.2<1>:w vme_m0.2<0,1,0>:w 16:w {align1}; +#endif + +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 4:UW {align1} ; /* offset += 4 */ + +add.z.f0.1 (1) num_macroblocks<1>:w num_macroblocks<0,1,0>:w -1:w {align1} ; +(-f0.1)jmpi (1) __VME_LOOP ; + +__EXIT: /* * kill thread diff --git a/src/shaders/vme/inter_frame.g6a b/src/shaders/vme/inter_frame.g6a index 6c03c9e..7568a01 100644 --- a/src/shaders/vme/inter_frame.g6a +++ b/src/shaders/vme/inter_frame.g6a @@ -1,2 +1,2 @@ -#include "gen6_vme_header.inc" +#include "vme.inc" #include "inter_frame.asm" diff --git a/src/shaders/vme/inter_frame.g6b b/src/shaders/vme/inter_frame.g6b index 45fa058..b39c58f 100644 --- a/src/shaders/vme/inter_frame.g6b +++ b/src/shaders/vme/inter_frame.g6b @@ -1,30 +1,43 @@ { 0x00800001, 0x24000061, 0x00000000, 0x00000000 }, { 0x00800001, 0x24400061, 0x00000000, 0x00000000 }, - { 0x00200041, 0x24082e29, 0x004500a0, 0x00100010 }, - { 0x00200041, 0x24002e29, 0x004500a0, 0x00100010 }, - { 0x00000040, 0x24003dad, 0x00450400, 0xfff0fff0 }, - { 0x00000040, 0x24023dad, 0x00450402, 0xfff4fff4 }, - { 0x00000001, 0x240c0061, 0x00000000, 0x7e203000 }, - { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, - { 0x00000001, 0x24160169, 0x00000000, 0x28302830 }, - { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 }, - { 0x00000001, 0x24240061, 0x00000000, 0x00000020 }, - { 0x00000001, 0x24280061, 0x00000000, 0x00003f3f }, - { 0x00600001, 0x20200022, 0x008d0420, 0x00000000 }, + { 0x00800001, 0x24600061, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 }, + { 0x00200009, 0x24402e29, 0x004500a0, 0x00040004 }, + { 0x00000040, 0x24403dad, 0x00450440, 0xfff0fff0 }, + { 0x00000040, 0x24423dad, 0x00450442, 0xfff4fff4 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x7e203000 }, + { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, + { 0x00000001, 0x24640061, 0x00000000, 0x00000020 }, + { 0x00000001, 0x24680061, 0x00000000, 0x00003f3f }, + { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 }, + { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 }, + { 0x00000009, 0x24880c21, 0x00000488, 0x00000002 }, + { 0x00000001, 0x24940231, 0x00000014, 0x00000000 }, + { 0x00600001, 0x20000022, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x20200022, 0x008d0460, 0x00000000 }, { 0x00600001, 0x20400062, 0x00000000, 0x00000000 }, { 0x00600001, 0x20400062, 0x00000000, 0x00000000 }, { 0x00600001, 0x20600062, 0x00000000, 0x00000000 }, { 0x08600031, 0x21801cdd, 0x00000000, 0x08482000 }, - { 0x00000041, 0x24684521, 0x000000a2, 0x000000a1 }, - { 0x00000040, 0x24684421, 0x00000468, 0x000000a0 }, - { 0x00000041, 0x24680c21, 0x00000468, 0x00000004 }, - { 0x00000001, 0x24740231, 0x00000014, 0x00000000 }, - { 0x00600001, 0x20000022, 0x008d0460, 0x00000000 }, - { 0x00200001, 0x24600229, 0x004501a0, 0x00000000 }, - { 0x00000040, 0x24603dad, 0x00450460, 0xffc0ffc0 }, - { 0x00000040, 0x24623dad, 0x00450462, 0xffd0ffd0 }, - { 0x00600001, 0x20200022, 0x008c0460, 0x00000000 }, - { 0x00600001, 0x20400022, 0x008c0460, 0x00000000 }, + { 0x00600001, 0x20000022, 0x008d0480, 0x00000000 }, + { 0x00200001, 0x24a00229, 0x004501a0, 0x00000000 }, + { 0x00000040, 0x24a03dad, 0x004504a0, 0xffc0ffc0 }, + { 0x00000040, 0x24a23dad, 0x004504a2, 0xffd0ffd0 }, + { 0x00600001, 0x20200022, 0x008c04a0, 0x00000000 }, + { 0x00600001, 0x20400022, 0x008c04a0, 0x00000000 }, { 0x05800031, 0x22001cdd, 0x00000000, 0x061b0303 }, + { 0x00000040, 0x20a02e31, 0x000000a0, 0x00010001 }, + { 0x00000040, 0x24482d29, 0x00000448, 0x00100010 }, + { 0x00000040, 0x24403dad, 0x00000440, 0x00100010 }, + { 0x01000010, 0x20004528, 0x000000a2, 0x000000a0 }, + { 0x00010001, 0x20a00171, 0x00000000, 0x00000000 }, + { 0x00010001, 0x24480169, 0x00000000, 0x00000000 }, + { 0x00010040, 0x244a2d29, 0x0000044a, 0x00100010 }, + { 0x00010001, 0x244001ed, 0x00000000, 0xfff0fff0 }, + { 0x00010040, 0x24423dad, 0x00000442, 0x00100010 }, + { 0x00000040, 0x24882c21, 0x00000488, 0x00040004 }, + { 0x01000040, 0x20a63dad, 0x020000a6, 0xffffffff }, + { 0x00110020, 0x34001c00, 0x02001400, 0xffffffce }, { 0x00600001, 0x20000022, 0x008d0000, 0x00000000 }, { 0x07800031, 0x24001cc8, 0x00000000, 0x82000010 }, diff --git a/src/shaders/vme/inter_frame.g7a b/src/shaders/vme/inter_frame.g7a index 170e35f..7568a01 100644 --- a/src/shaders/vme/inter_frame.g7a +++ b/src/shaders/vme/inter_frame.g7a @@ -1,2 +1,2 @@ -#include "gen7_vme_header.inc" +#include "vme.inc" #include "inter_frame.asm" diff --git a/src/shaders/vme/inter_frame.g7b b/src/shaders/vme/inter_frame.g7b index 82e48d2..ee9ed81 100644 --- a/src/shaders/vme/inter_frame.g7b +++ b/src/shaders/vme/inter_frame.g7b @@ -1,27 +1,37 @@ { 0x00800001, 0x24000061, 0x00000000, 0x00000000 }, { 0x00800001, 0x24400061, 0x00000000, 0x00000000 }, - { 0x00200041, 0x24082e29, 0x004500a0, 0x00100010 }, - { 0x00000001, 0x240001ed, 0x00000000, 0xfff0fff0 }, - { 0x00000001, 0x240201ed, 0x00000000, 0xfff4fff4 }, - { 0x00000001, 0x240c0061, 0x00000000, 0x7e203000 }, - { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, - { 0x00000001, 0x24160169, 0x00000000, 0x28302830 }, - { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 }, - { 0x00000001, 0x24240061, 0x00000000, 0x00000020 }, - { 0x00000001, 0x24280061, 0x00000000, 0x00003f3f }, - { 0x00600001, 0x28200021, 0x008d0420, 0x00000000 }, + { 0x00800001, 0x24600061, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 }, + { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 }, + { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 }, + { 0x00000001, 0x244c0061, 0x00000000, 0x7e203000 }, + { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x24560169, 0x00000000, 0x28302830 }, + { 0x00000001, 0x24640061, 0x00000000, 0x00000020 }, + { 0x00000001, 0x24680061, 0x00000000, 0x00003f3f }, + { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 }, + { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 }, + { 0x00000009, 0x24880c21, 0x00000488, 0x00000002 }, + { 0x00000001, 0x24940231, 0x00000014, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, { 0x00600001, 0x28400061, 0x00000000, 0x00000000 }, { 0x00600001, 0x28600061, 0x00000000, 0x00000000 }, { 0x00600001, 0x28800061, 0x00000000, 0x00000000 }, { 0x08600031, 0x21801cbd, 0x00000800, 0x0a682000 }, - { 0x00000041, 0x24684521, 0x000000a2, 0x000000a1 }, - { 0x00000040, 0x24684421, 0x00000468, 0x000000a0 }, - { 0x00000041, 0x24680c21, 0x00000468, 0x00000004 }, - { 0x00000001, 0x24740231, 0x00000014, 0x00000000 }, - { 0x00600001, 0x28000021, 0x008d0460, 0x00000000 }, - { 0x00200001, 0x246002a9, 0x004501a0, 0x00000000 }, - { 0x00600001, 0x28200021, 0x008c0460, 0x00000000 }, - { 0x00600001, 0x28400021, 0x008c0460, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, + { 0x00200001, 0x24a002a9, 0x004501a0, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008c04a0, 0x00000000 }, + { 0x00600001, 0x28400021, 0x008c04a0, 0x00000000 }, { 0x0a800031, 0x20001cac, 0x00000800, 0x060a0303 }, + { 0x00000040, 0x20a02e31, 0x000000a0, 0x00010001 }, + { 0x00000040, 0x24482d29, 0x00000448, 0x00100010 }, + { 0x01000010, 0x20004528, 0x000000a2, 0x000000a0 }, + { 0x00010001, 0x20a00171, 0x00000000, 0x00000000 }, + { 0x00010001, 0x24480169, 0x00000000, 0x00000000 }, + { 0x00010040, 0x244a2d29, 0x0000044a, 0x00100010 }, + { 0x00000040, 0x24882c21, 0x00000488, 0x00040004 }, + { 0x01000040, 0x20a63dad, 0x020000a6, 0xffffffff }, + { 0x00110020, 0x34001c00, 0x02001400, 0xffffffd8 }, { 0x00600001, 0x28000021, 0x008d0000, 0x00000000 }, { 0x07800031, 0x24001ca8, 0x00000800, 0x82000010 }, diff --git a/src/shaders/vme/intra_frame.asm b/src/shaders/vme/intra_frame.asm index 9642c87..0ddc665 100644 --- a/src/shaders/vme/intra_frame.asm +++ b/src/shaders/vme/intra_frame.asm @@ -21,36 +21,46 @@ __INTRA_START: mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1}; mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1}; - +mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ; + +shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */ +add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */ +mov (1) read0_header.8<1>:UD BLOCK_32X1 {align1}; +mov (1) read0_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */ +mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1}; +mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; +mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + /* * Media Read Message -- fetch neighbor edge pixels */ /* ROW */ -mul (2) tmp_reg0.0<1>:D orig_xy_ub<2,2,1>:UB 16:UW {align1}; /* (x, y) * 16 */ -add (1) tmp_reg0.0<1>:D tmp_reg0.0<0,1,0>:D -8:W {align1}; /* X offset */ -add (1) tmp_reg0.4<1>:D tmp_reg0.4<0,1,0>:D -1:W {align1}; /* Y offset */ -mov (1) tmp_reg0.8<1>:UD BLOCK_32X1 {align1}; -mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ -mov (8) msg_reg0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1}; +__INTRA_LOOP: +mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1}; send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1}; /* COL */ -mul (2) tmp_reg0.0<1>:D orig_xy_ub<2,2,1>:UB 16:UW {align1}; /* (x, y) * 16 */ -add (1) tmp_reg0.0<1>:D tmp_reg0.0<0,1,0>:D -4:W {align1}; /* X offset */ -mov (1) tmp_reg0.8<1>:UD BLOCK_4X16 {align1}; -mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ -mov (8) msg_reg0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1}; +mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1}; /* * VME message */ /* m0 */ -mul (2) tmp_reg0.8<1>:UW orig_xy_ub<2,2,1>:UB 16:UW {align1}; /* (x, y) * 16 */ -mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ -mov (8) vme_msg_0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1}; +mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1}; /* m1 */ +mov (1) intra_flag<1>:UW 0x0:UW {align1} ; cmp.nz.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1}; (f0.0) mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE + LUMA_INTRA_4x4_DISABLE {align1}; @@ -68,7 +78,7 @@ add (1) tmp_x_w<1>:W w_in_mb_uw<0,1,0>:UW -tmp_x_w<0,1,0>:W {align1}; mul.nz.f0.0 (1) null<1>:UD tmp_x_w<0,1,0>:W orig_y_ub<0,1,0>:UB {align1}; /* (width - (X + 1)) * Y != 0 */ (f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_C {align1}; /* C */ -mov (8) vme_msg_1<1>:UD tmp_reg1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1}; /* m2 */ mov (8) vme_msg_2<1>:UD 0x0:UD {align1}; @@ -98,10 +108,7 @@ send (8) /* * Oword Block Write message */ -mul (1) tmp_reg3.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; -add (1) tmp_reg3.8<1>:UD tmp_reg3.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; -mov (1) tmp_reg3.20<1>:UB thread_id_ub {align1}; /* dispatch id */ -mov (8) msg_reg0.0<1>:UD tmp_reg3<8,8,1>:UD {align1}; +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1}; @@ -124,6 +131,28 @@ send (16) rlen obw_wb_length {align1}; +add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D 16:W {align1}; /* X offset: X += 16 */ +add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D 16:W {align1}; /* X offset: X += 16 */ +add (1) vme_m0.8<1>:UW vme_m0.8<0,1,0>:UW 16:UW {align1}; /* Y = Y, X += 16 */ + +add (1) orig_x_ub<1>:ub orig_x_ub<0,1,0>:ub 1:uw {align1} ; +cmp.e.f0.0 (1) null<1>:uw w_in_mb_uw<0,1,0>:uw orig_x_ub<0,1,0>:ub {align1}; +(f0.0)mov (1) orig_x_ub<1>:ub 0:uw {align1} ; +(f0.0)add (1) orig_y_ub<1>:ub orig_y_ub<0,1,0>:ub 1:uw {align1} ; +(f0.0)mov (1) read0_header.0<1>:D -8:W {align1}; /* X offset */ +(f0.0)add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D 16:W {align1}; /* Y offset */ +(f0.0)mov (1) read1_header.0<1>:D -4:W {align1}; /* X offset */ +(f0.0)add (1) read1_header.4<1>:D read1_header.4<0,1,0>:D 16:W {align1}; /* Y offset */ +/* X = 0, Y += 16 */ +(f0.0)mov (1) vme_m0.8<1>:UW 0:UW {align1}; +(f0.0)add (1) vme_m0.10<1>:UW vme_m0.10<0,1,0>:UW 16:UW {align1}; + +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 1:uw {align1}; /* the new offset */ + +add.z.f0.0 (1) num_macroblocks<1>:w num_macroblocks<0,1,0>:w -1:w {align1} ; +(-f0.0)jmpi (1) __INTRA_LOOP ; + +__EXIT: /* * kill thread */ diff --git a/src/shaders/vme/intra_frame.g6a b/src/shaders/vme/intra_frame.g6a index 8bf33d5..10c4f98 100644 --- a/src/shaders/vme/intra_frame.g6a +++ b/src/shaders/vme/intra_frame.g6a @@ -1,3 +1,3 @@ -#include "gen6_vme_header.inc" +#include "vme.inc" #include "intra_frame.asm" diff --git a/src/shaders/vme/intra_frame.g6b b/src/shaders/vme/intra_frame.g6b index 82a3e4b..640b8b2 100644 --- a/src/shaders/vme/intra_frame.g6b +++ b/src/shaders/vme/intra_frame.g6b @@ -1,48 +1,66 @@ { 0x00800001, 0x24000061, 0x00000000, 0x00000000 }, { 0x00800001, 0x24400061, 0x00000000, 0x00000000 }, - { 0x00200041, 0x24002e25, 0x004500a0, 0x00100010 }, + { 0x00800001, 0x24800061, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 }, { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 }, { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff }, { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f }, { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 }, + { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc }, + { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 }, + { 0x00000001, 0x24340231, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 }, + { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, + { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 }, + { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 }, + { 0x00000001, 0x24940231, 0x00000014, 0x00000000 }, { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 }, { 0x04600031, 0x22401cd1, 0x00000000, 0x02188004 }, - { 0x00200041, 0x24002e25, 0x004500a0, 0x00100010 }, - { 0x00000040, 0x24003ca5, 0x00000400, 0xfffcfffc }, - { 0x00000001, 0x240800e1, 0x00000000, 0x000f0003 }, - { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, - { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 }, + { 0x00600001, 0x20000022, 0x008d0420, 0x00000000 }, { 0x04600031, 0x22801cd1, 0x00000000, 0x02288004 }, - { 0x00200041, 0x24082e29, 0x004500a0, 0x00100010 }, - { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, - { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 }, + { 0x00600001, 0x20000022, 0x008d0440, 0x00000000 }, + { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 }, { 0x02000010, 0x20002e28, 0x000000a4, 0x00010001 }, - { 0x00010001, 0x243c00f1, 0x00000000, 0x00000006 }, + { 0x00010001, 0x247c00f1, 0x00000000, 0x00000006 }, { 0x02000010, 0x20002e28, 0x000000a0, 0x00000000 }, - { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000060 }, + { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000060 }, { 0x02000010, 0x20002e28, 0x000000a1, 0x00000000 }, - { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000010 }, + { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000010 }, { 0x02000041, 0x20004628, 0x000000a0, 0x000000a1 }, - { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000004 }, - { 0x00000040, 0x24402e2d, 0x000000a0, 0x00010001 }, - { 0x00000040, 0x2440352d, 0x000000a2, 0x00004440 }, - { 0x02000041, 0x200045a0, 0x00000440, 0x000000a1 }, - { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000008 }, - { 0x00600001, 0x20200022, 0x008d0420, 0x00000000 }, + { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000004 }, + { 0x00000040, 0x24c02e2d, 0x000000a0, 0x00010001 }, + { 0x00000040, 0x24c0352d, 0x000000a2, 0x000044c0 }, + { 0x02000041, 0x200045a0, 0x000004c0, 0x000000a1 }, + { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000008 }, + { 0x00600001, 0x20200022, 0x008d0460, 0x00000000 }, { 0x00600001, 0x20400062, 0x00000000, 0x00000000 }, { 0x00600001, 0x20400022, 0x008d0240, 0x00000000 }, { 0x00600001, 0x206000e2, 0x00000000, 0x00000000 }, { 0x00800001, 0x20600232, 0x00cf0283, 0x00000000 }, { 0x00000001, 0x20700062, 0x00000000, 0x11111111 }, { 0x08600031, 0x21801cdd, 0x00000000, 0x08184000 }, - { 0x00000041, 0x24684521, 0x000000a2, 0x000000a1 }, - { 0x00000040, 0x24684421, 0x00000468, 0x000000a0 }, - { 0x00000001, 0x24740231, 0x00000014, 0x00000000 }, - { 0x00600001, 0x20000022, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x20000022, 0x008d0480, 0x00000000 }, { 0x00000001, 0x20200022, 0x00000180, 0x00000000 }, { 0x00000001, 0x20240022, 0x00000190, 0x00000000 }, { 0x00000001, 0x20280022, 0x00000194, 0x00000000 }, { 0x00000001, 0x202c0022, 0x00000198, 0x00000000 }, { 0x05800031, 0x22001cdd, 0x00000000, 0x041b0003 }, + { 0x00000040, 0x24003ca5, 0x00000400, 0x00100010 }, + { 0x00000040, 0x24203ca5, 0x00000420, 0x00100010 }, + { 0x00000040, 0x24482d29, 0x00000448, 0x00100010 }, + { 0x00000040, 0x20a02e31, 0x000000a0, 0x00010001 }, + { 0x01000010, 0x20004528, 0x000000a2, 0x000000a0 }, + { 0x00010001, 0x20a00171, 0x00000000, 0x00000000 }, + { 0x00010040, 0x20a12e31, 0x000000a1, 0x00010001 }, + { 0x00010001, 0x240001e5, 0x00000000, 0xfff8fff8 }, + { 0x00010040, 0x24043ca5, 0x00000404, 0x00100010 }, + { 0x00010001, 0x242001e5, 0x00000000, 0xfffcfffc }, + { 0x00010040, 0x24243ca5, 0x00000424, 0x00100010 }, + { 0x00010001, 0x24480169, 0x00000000, 0x00000000 }, + { 0x00010040, 0x244a2d29, 0x0000044a, 0x00100010 }, + { 0x00000040, 0x24882c21, 0x00000488, 0x00010001 }, + { 0x01000040, 0x20a63dad, 0x000000a6, 0xffffffff }, + { 0x00110020, 0x34001c00, 0x00001400, 0xffffffa2 }, { 0x00600001, 0x20000022, 0x008d0000, 0x00000000 }, { 0x07800031, 0x24001cc8, 0x00000000, 0x82000010 }, diff --git a/src/shaders/vme/intra_frame.g7a b/src/shaders/vme/intra_frame.g7a index b4f8936..10c4f98 100644 --- a/src/shaders/vme/intra_frame.g7a +++ b/src/shaders/vme/intra_frame.g7a @@ -1,3 +1,3 @@ -#include "gen7_vme_header.inc" +#include "vme.inc" #include "intra_frame.asm" diff --git a/src/shaders/vme/intra_frame.g7b b/src/shaders/vme/intra_frame.g7b index 16ee9af..693113d 100644 --- a/src/shaders/vme/intra_frame.g7b +++ b/src/shaders/vme/intra_frame.g7b @@ -1,48 +1,66 @@ { 0x00800001, 0x24000061, 0x00000000, 0x00000000 }, { 0x00800001, 0x24400061, 0x00000000, 0x00000000 }, - { 0x00200041, 0x24002e25, 0x004500a0, 0x00100010 }, + { 0x00800001, 0x24800061, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 }, { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 }, { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff }, { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f }, { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 }, + { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc }, + { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 }, + { 0x00000001, 0x24340231, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 }, + { 0x00000001, 0x24540231, 0x00000014, 0x00000000 }, + { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 }, + { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 }, + { 0x00000001, 0x24940231, 0x00000014, 0x00000000 }, { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 }, { 0x04600031, 0x22401cb1, 0x00000800, 0x02190004 }, - { 0x00200041, 0x24002e25, 0x004500a0, 0x00100010 }, - { 0x00000040, 0x24003ca5, 0x00000400, 0xfffcfffc }, - { 0x00000001, 0x240800e1, 0x00000000, 0x000f0003 }, - { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, - { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 }, { 0x04600031, 0x22801cb1, 0x00000800, 0x02290004 }, - { 0x00200041, 0x24082e29, 0x004500a0, 0x00100010 }, - { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, - { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 }, + { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 }, { 0x02000010, 0x20002e28, 0x000000a4, 0x00010001 }, - { 0x00010001, 0x243c00f1, 0x00000000, 0x00000006 }, + { 0x00010001, 0x247c00f1, 0x00000000, 0x00000006 }, { 0x02000010, 0x20002e28, 0x000000a0, 0x00000000 }, - { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000060 }, + { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000060 }, { 0x02000010, 0x20002e28, 0x000000a1, 0x00000000 }, - { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000010 }, + { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000010 }, { 0x02000041, 0x20004628, 0x000000a0, 0x000000a1 }, - { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000004 }, - { 0x00000040, 0x24402e2d, 0x000000a0, 0x00010001 }, - { 0x00000040, 0x2440352d, 0x000000a2, 0x00004440 }, - { 0x02000041, 0x200045a0, 0x00000440, 0x000000a1 }, - { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000008 }, - { 0x00600001, 0x28200021, 0x008d0420, 0x00000000 }, + { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000004 }, + { 0x00000040, 0x24c02e2d, 0x000000a0, 0x00010001 }, + { 0x00000040, 0x24c0352d, 0x000000a2, 0x000044c0 }, + { 0x02000041, 0x200045a0, 0x000004c0, 0x000000a1 }, + { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000008 }, + { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, { 0x00600001, 0x28400061, 0x00000000, 0x00000000 }, { 0x00600001, 0x28600021, 0x008d0240, 0x00000000 }, { 0x00600001, 0x288000e1, 0x00000000, 0x00000000 }, { 0x00800001, 0x28800231, 0x00cf0283, 0x00000000 }, { 0x00000001, 0x28900061, 0x00000000, 0x11111111 }, { 0x08600031, 0x21801cbd, 0x00000800, 0x0a184000 }, - { 0x00000041, 0x24684521, 0x000000a2, 0x000000a1 }, - { 0x00000040, 0x24684421, 0x00000468, 0x000000a0 }, - { 0x00000001, 0x24740231, 0x00000014, 0x00000000 }, - { 0x00600001, 0x28000021, 0x008d0460, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, { 0x00000001, 0x28200021, 0x00000180, 0x00000000 }, { 0x00000001, 0x28240021, 0x00000190, 0x00000000 }, { 0x00000001, 0x28280021, 0x00000194, 0x00000000 }, { 0x00000001, 0x282c0021, 0x00000198, 0x00000000 }, { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0003 }, + { 0x00000040, 0x24003ca5, 0x00000400, 0x00100010 }, + { 0x00000040, 0x24203ca5, 0x00000420, 0x00100010 }, + { 0x00000040, 0x24482d29, 0x00000448, 0x00100010 }, + { 0x00000040, 0x20a02e31, 0x000000a0, 0x00010001 }, + { 0x01000010, 0x20004528, 0x000000a2, 0x000000a0 }, + { 0x00010001, 0x20a00171, 0x00000000, 0x00000000 }, + { 0x00010040, 0x20a12e31, 0x000000a1, 0x00010001 }, + { 0x00010001, 0x240001e5, 0x00000000, 0xfff8fff8 }, + { 0x00010040, 0x24043ca5, 0x00000404, 0x00100010 }, + { 0x00010001, 0x242001e5, 0x00000000, 0xfffcfffc }, + { 0x00010040, 0x24243ca5, 0x00000424, 0x00100010 }, + { 0x00010001, 0x24480169, 0x00000000, 0x00000000 }, + { 0x00010040, 0x244a2d29, 0x0000044a, 0x00100010 }, + { 0x00000040, 0x24882c21, 0x00000488, 0x00010001 }, + { 0x01000040, 0x20a63dad, 0x000000a6, 0xffffffff }, + { 0x00110020, 0x34001c00, 0x00001400, 0xffffffa2 }, { 0x00600001, 0x28000021, 0x008d0000, 0x00000000 }, { 0x07800031, 0x24001ca8, 0x00000800, 0x82000010 }, diff --git a/src/shaders/vme/gen6_vme_header.inc b/src/shaders/vme/vme.inc similarity index 75% rename from src/shaders/vme/gen6_vme_header.inc rename to src/shaders/vme/vme.inc index e689ef6..44fe320 100644 --- a/src/shaders/vme/gen6_vme_header.inc +++ b/src/shaders/vme/vme.inc @@ -56,8 +56,16 @@ define(`SEARCH_PATH_LEN', `0x00003F3F') define(`INTRA_PREDICTORE_MODE', `0x11111111:UD') +#ifdef DEV_SNB + define(`OBW_CACHE_TYPE', `5') +#else + +define(`OBW_CACHE_TYPE', `10') + +#endif + define(`OBW_MESSAGE_TYPE', `8') define(`OBW_BIND_IDX', `BIND_IDX_OUTPUT') @@ -67,8 +75,17 @@ define(`OBW_CONTROL_1', `1') /* 1 OWord, high 128 bits */ define(`OBW_CONTROL_2', `2') /* 2 OWords */ define(`OBW_CONTROL_3', `3') /* 4 OWords */ +#ifdef DEV_SNB + define(`OBW_WRITE_COMMIT_CATEGORY', `1') /* write commit on Sandybrige */ +#else + +define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */ + +#endif + + define(`OBW_HEADER_PRESENT', `1') /* GRF registers @@ -97,6 +114,7 @@ define(`orig_xy_ub', `inline_reg0.0') define(`orig_x_ub', `inline_reg0.0') /* in macroblock */ define(`orig_y_ub', `inline_reg0.1') define(`transform_8x8_ub', `inline_reg0.4') +define(`num_macroblocks', `inline_reg0.6') /* * GRF 6~11 -- reserved @@ -110,13 +128,24 @@ define(`vme_wb0', `r12') define(`vme_wb1', `r13') define(`vme_wb2', `r14') define(`vme_wb3', `r15') - + +#ifdef DEV_SNB /* * GRF 16 -- write back for Oword Block Write message with write commit bit */ define(`obw_wb', `r16') define(`obw_wb_length', `1') +#else + +/* + * GRF 16 -- reserved + */ +define(`obw_wb', `null<1>:W') +define(`obw_wb_length', `0') + +#endif + /* * GRF 18~21 -- Intra Neighbor Edge Pixels */ @@ -128,16 +157,28 @@ define(`INEP_COL1', `r21') * temporary registers */ define(`tmp_reg0', `r32') +define(`read0_header', `tmp_reg0') define(`tmp_reg1', `r33') -define(`intra_part_mask_ub', `tmp_reg1.28') -define(`mb_intra_struct_ub', `tmp_reg1.29') +define(`read1_header', `tmp_reg1') define(`tmp_reg2', `r34') -define(`tmp_x_w', `tmp_reg2.0') -define(`tmp_reg3', `r35') +define(`vme_m0', `tmp_reg2') +define(`tmp_reg3', `r35') +define(`vme_m1', `tmp_reg3') +define(`intra_flag', `vme_m1.28') +define(`intra_part_mask_ub', `vme_m1.28') +define(`mb_intra_struct_ub', `vme_m1.29') +define(`tmp_reg4', `r36') +define(`obw_m0', `tmp_reg4') +define(`tmp_reg5', `r37') +define(`obw_m1', `tmp_reg5') +define(`tmp_reg6', `r38') +define(`tmp_x_w', `tmp_reg6.0') /* * MRF registers */ +#ifdef DEV_SNB + define(`msg_ind', `0') define(`msg_reg0', `m0') /* m0 */ define(`msg_reg1', `m1') /* m1 */ @@ -145,17 +186,49 @@ define(`msg_reg2', `m2') /* m2 */ define(`msg_reg3', `m3') /* m3 */ define(`msg_reg4', `m4') /* m4 */ +#else + +define(`msg_ind', `64') +define(`msg_reg0', `g64') +define(`msg_reg1', `g65') +define(`msg_reg2', `g66') +define(`msg_reg3', `g67') +define(`msg_reg4', `g68') + +#endif + /* * VME message payload */ + +#ifdef DEV_SNB + define(`vme_msg_length', `4') -define(`vme_intra_wb_length', `1') define(`vme_inter_wb_length', `4') + +#else + +define(`vme_msg_length', `5') +define(`vme_inter_wb_length', `6') + +#endif + +define(`vme_intra_wb_length', `1') + define(`vme_msg_ind', `msg_ind') define(`vme_msg_0', `msg_reg0') define(`vme_msg_1', `msg_reg1') define(`vme_msg_2', `msg_reg2') + +#ifdef DEV_SNB + define(`vme_msg_3', `vme_msg_2') define(`vme_msg_4', `msg_reg3') +#else + +define(`vme_msg_3', `msg_reg3') +define(`vme_msg_4', `msg_reg4') + +#endif -- 2.7.4