Now the mb_intra_struct is calculated by the GPU thread.
But when handling the logic for multi-slice, the GPU calculation will
become complex.So pass the intra struct flag of macroblock by using
thread payload.
Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
int i, s;
unsigned int *command_ptr;
+#define INTRA_PRED_AVAIL_FLAG_AE 0x60
+#define INTRA_PRED_AVAIL_FLAG_B 0x10
+#define INTRA_PRED_AVAIL_FLAG_C 0x8
+#define INTRA_PRED_AVAIL_FLAG_D 0x4
+#define INTRA_PRED_AVAIL_FLAG_BCD_MASK 0x1C
+
dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
command_ptr = vme_context->vme_batchbuffer.bo->virtual;
VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
int slice_mb_begin = pSliceParameter->macroblock_address;
int slice_mb_number = pSliceParameter->num_macroblocks;
-
+ unsigned int mb_intra_ub;
for (i = 0; i < slice_mb_number; ) {
int mb_count = i + slice_mb_begin;
mb_x = mb_count % mb_width;
mb_y = mb_count / mb_width;
-
+ mb_intra_ub = 0;
+ if (mb_x != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+ }
+ if (mb_y != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+ if (mb_x != 0)
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+ if (mb_x != (mb_width -1))
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+ }
+ if (i == 0)
+ mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
+
*command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
*command_ptr++ = kernel;
*command_ptr++ = 0;
/*inline data */
*command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
- *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | ((i==0) << 1));
+ *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
i += 1;
}
and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1};
(f0.0) mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE {align1};
-cmp.nz.f0.0 (1) null<1>:UW orig_x_ub<0,1,0>:UB 0:UW {align1}; /* X != 0 */
-(f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_AE {align1}; /* A */
-
-cmp.nz.f0.0 (1) null<1>:UW orig_y_ub<0,1,0>:UB 0:UW {align1}; /* Y != 0 */
-(f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_B {align1}; /* B */
-
-mul.nz.f0.0 (1) null<1>:UW orig_x_ub<0,1,0>:UB orig_y_ub<0,1,0>:UB {align1}; /* X * Y != 0 */
-(f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_D {align1}; /* D */
-
-add (1) tmp_x_w<1>:W orig_x_ub<0,1,0>:UB 1:UW {align1}; /* X + 1 */
-add (1) tmp_x_w<1>:W w_in_mb_uw<0,1,0>:UW -tmp_x_w<0,1,0>:W {align1}; /* width - (X + 1) */
-mul.nz.f0.0 (1) null<1>:UD tmp_x_w<0,1,0>:W orig_y_ub<0,1,0>:UB {align1}; /* (width - (X + 1)) * Y != 0 */
-(f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_C {align1}; /* C */
-
-and.nz.f0.0 (1) null<1>:UW slice_edge_ub<0,1,0>:UB 2:UW {align1};
-(f0.0) and (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB 0xE0 {align1}; /* slice edge disable B,C,D*/
+/* assign MB intra struct from the thread payload*/
+mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<1>:UB {align1};
/* Disable DC HAAR component when calculating HARR SATD block */
mov (1) tmp_reg0.0<1>:UW DC_HARR_DISABLE:UW {align1};
{ 0x00000001, 0x247c0169, 0x00000000, 0x00000000 },
{ 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 },
{ 0x00010001, 0x247c00f1, 0x00000000, 0x00000002 },
- { 0x02000010, 0x20002e28, 0x000000a0, 0x00000000 },
- { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000060 },
- { 0x02000010, 0x20002e28, 0x000000a1, 0x00000000 },
- { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000010 },
- { 0x02000041, 0x20004628, 0x000000a0, 0x000000a1 },
- { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000004 },
- { 0x00000040, 0x25202e2d, 0x000000a0, 0x00010001 },
- { 0x00000040, 0x2520352d, 0x000000a2, 0x00004520 },
- { 0x02000041, 0x200045a0, 0x00000520, 0x000000a1 },
- { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000008 },
- { 0x02000005, 0x20002e28, 0x000000a4, 0x00020002 },
- { 0x00010005, 0x247d1e31, 0x0000047d, 0x000000e0 },
+ { 0x00000001, 0x247d0231, 0x002000a5, 0x00000000 },
{ 0x00000001, 0x24000169, 0x00000000, 0x00200020 },
{ 0x00000001, 0x247e0231, 0x00000400, 0x00000000 },
{ 0x00000001, 0x244c0061, 0x00000000, 0x00800000 },
and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1};
(f0.0) mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE {align1};
-cmp.nz.f0.0 (1) null<1>:UW orig_x_ub<0,1,0>:UB 0:UW {align1}; /* X != 0 */
-(f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_AE {align1}; /* A */
-
-cmp.nz.f0.0 (1) null<1>:UW orig_y_ub<0,1,0>:UB 0:UW {align1}; /* Y != 0 */
-(f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_B {align1}; /* B */
-
-mul.nz.f0.0 (1) null<1>:UW orig_x_ub<0,1,0>:UB orig_y_ub<0,1,0>:UB {align1}; /* X * Y != 0 */
-(f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_D {align1}; /* D */
-
-add (1) tmp_x_w<1>:W orig_x_ub<0,1,0>:UB 1:UW {align1}; /* X + 1 */
-add (1) tmp_x_w<1>:W w_in_mb_uw<0,1,0>:UW -tmp_x_w<0,1,0>:W {align1}; /* width - (X + 1) */
-mul.nz.f0.0 (1) null<1>:UD tmp_x_w<0,1,0>:W orig_y_ub<0,1,0>:UB {align1}; /* (width - (X + 1)) * Y != 0 */
-(f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_C {align1}; /* C */
-
-and.nz.f0.0 (1) null<1>:UW slice_edge_ub<0,1,0>:UB 2:UW {align1};
-(f0.0) and (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB 0xE0 {align1}; /* slice edge disable B,C,D*/
-
+/* assign MB intra struct from the thread payload*/
+mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<1>:UB {align1};
+
/* Disable DC HAAR component when calculating HARR SATD block */
mov (1) tmp_reg0.0<1>:UW DC_HARR_DISABLE:UW {align1};
mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
{ 0x00000001, 0x247c0169, 0x00000000, 0x00000000 },
{ 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 },
{ 0x00010001, 0x247c00f1, 0x00000000, 0x00000002 },
- { 0x02000010, 0x20002e28, 0x000000a0, 0x00000000 },
- { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000060 },
- { 0x02000010, 0x20002e28, 0x000000a1, 0x00000000 },
- { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000010 },
- { 0x02000041, 0x20004628, 0x000000a0, 0x000000a1 },
- { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000004 },
- { 0x00000040, 0x25202e2d, 0x000000a0, 0x00010001 },
- { 0x00000040, 0x2520352d, 0x000000a2, 0x00004520 },
- { 0x02000041, 0x200045a0, 0x00000520, 0x000000a1 },
- { 0x00010040, 0x247d1e31, 0x0000047d, 0x00000008 },
- { 0x02000005, 0x20002e28, 0x000000a4, 0x00020002 },
- { 0x00010005, 0x247d1e31, 0x0000047d, 0x000000e0 },
+ { 0x00000001, 0x247d0231, 0x002000a5, 0x00000000 },
{ 0x00000001, 0x24000169, 0x00000000, 0x00200020 },
{ 0x00000001, 0x247e0231, 0x00000400, 0x00000000 },
{ 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
define(`orig_x_ub', `inline_reg0.0') /* in macroblock */
define(`orig_y_ub', `inline_reg0.1')
define(`transform_8x8_ub', `inline_reg0.4')
-define(`slice_edge_ub', `inline_reg0.4')
+define(`input_mb_intra_ub', `inline_reg0.5')
define(`num_macroblocks', `inline_reg0.6')
/*