#include "gen6_vme.h"
#include "intel_media.h"
-#define MFC_SOFTWARE_HASWELL 1
+#define AVC_INTRA_RDO_OFFSET 4
+#define AVC_INTER_RDO_OFFSET 10
+#define AVC_INTER_MSG_OFFSET 8
+#define AVC_INTER_MV_OFFSET 48
+#define AVC_RDO_MASK 0xFFFF
+
+#define MFC_SOFTWARE_HASWELL 0
#define B0_STEP_REV 2
#define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
-static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
-#include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
-};
-
-static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
-#include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
+static const uint32_t gen75_mfc_batchbuffer_avc[][4] = {
+#include "shaders/utils/mfc_batchbuffer_hsw.g75b"
};
static struct i965_kernel gen75_mfc_kernels[] = {
{
"MFC AVC INTRA BATCHBUFFER ",
MFC_BATCHBUFFER_AVC_INTRA,
- gen75_mfc_batchbuffer_avc_intra,
- sizeof(gen75_mfc_batchbuffer_avc_intra),
- NULL
- },
-
- {
- "MFC AVC INTER BATCHBUFFER ",
- MFC_BATCHBUFFER_AVC_INTER,
- gen75_mfc_batchbuffer_avc_inter,
- sizeof(gen75_mfc_batchbuffer_avc_inter),
+ gen75_mfc_batchbuffer_avc,
+ sizeof(gen75_mfc_batchbuffer_avc),
NULL
},
};
}
-#ifdef MFC_SOFTWARE_HASWELL
+#if MFC_SOFTWARE_HASWELL
static int
gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
return len_in_dwords;
}
-#define AVC_INTRA_RDO_OFFSET 4
-#define AVC_INTER_RDO_OFFSET 10
-#define AVC_INTER_MSG_OFFSET 8
-#define AVC_INTER_MV_OFFSET 48
-#define AVC_RDO_MASK 0xFFFF
-
static void
gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
struct encode_state *encode_state,
&vme_context->vme_output,
BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
- assert(mfc_context->aux_batchbuffer_surface.bo);
- mfc_context->buffer_suface_setup(ctx,
- &mfc_context->gpe_context,
- &mfc_context->aux_batchbuffer_surface,
- BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
- SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
}
static void
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
- VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
- int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
- int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
- mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
- mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
- mfc_context->mfc_batchbuffer_surface.pitch = 16;
- mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr,
- "MFC batchbuffer",
- mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
- 0x1000);
+ assert(mfc_context->aux_batchbuffer_surface.bo);
mfc_context->buffer_suface_setup(ctx,
&mfc_context->gpe_context,
- &mfc_context->mfc_batchbuffer_surface,
+ &mfc_context->aux_batchbuffer_surface,
BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
}
(void)mfc_context;
}
+#define AVC_PAK_LEN_IN_BYTE 48
+#define AVC_PAK_LEN_IN_OWORD 3
+
static void
gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
- int index,
+ uint32_t intra_flag,
int head_offset,
- int batchbuffer_offset,
- int head_size,
- int tail_size,
int number_mb_cmds,
- int first_object,
- int last_object,
- int last_slice,
+ int slice_end_x,
+ int slice_end_y,
int mb_x,
int mb_y,
int width_in_mbs,
- int qp)
+ int qp,
+ uint32_t fwd_ref,
+ uint32_t bwd_ref)
{
- BEGIN_BATCH(batch, 12);
+ uint32_t temp_value;
+ BEGIN_BATCH(batch, 14);
- OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
- OUT_BATCH(batch, index);
+ OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2));
+ OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
/*inline data */
- OUT_BATCH(batch, head_offset);
- OUT_BATCH(batch, batchbuffer_offset);
- OUT_BATCH(batch,
- head_size << 16 |
- tail_size);
- OUT_BATCH(batch,
- number_mb_cmds << 16 |
- first_object << 2 |
- last_object << 1 |
- last_slice);
- OUT_BATCH(batch,
- mb_y << 8 |
- mb_x);
+ OUT_BATCH(batch, head_offset / 16);
+ OUT_BATCH(batch, (intra_flag) | (qp << 16));
+ temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16));
+ OUT_BATCH(batch, temp_value);
+
+ OUT_BATCH(batch, number_mb_cmds);
+
OUT_BATCH(batch,
- qp << 16 |
- width_in_mbs);
+ ((slice_end_y << 8) | (slice_end_x)));
+ OUT_BATCH(batch, fwd_ref);
+ OUT_BATCH(batch, bwd_ref);
+
+ OUT_BATCH(batch, MI_NOOP);
ADVANCE_BATCH(batch);
}
struct intel_encoder_context *encoder_context,
VAEncSliceParameterBufferH264 *slice_param,
int head_offset,
- unsigned short head_size,
- unsigned short tail_size,
- int batchbuffer_offset,
int qp,
int last_slice)
{
struct intel_batchbuffer *batch = encoder_context->base.batch;
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
int total_mbs = slice_param->num_macroblocks;
+ int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
int number_mb_cmds = 128;
- int starting_mb = 0;
- int last_object = 0;
- int first_object = 1;
+ int starting_offset = 0;
int i;
int mb_x, mb_y;
- int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
-
- for (i = 0; i < total_mbs / number_mb_cmds; i++) {
- last_object = (total_mbs - starting_mb) == number_mb_cmds;
- mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
- mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
- assert(mb_x <= 255 && mb_y <= 255);
-
- starting_mb += number_mb_cmds;
-
- gen75_mfc_batchbuffer_emit_object_command(batch,
- index,
- head_offset,
- batchbuffer_offset,
- head_size,
- tail_size,
- number_mb_cmds,
- first_object,
- last_object,
- last_slice,
- mb_x,
- mb_y,
- width_in_mbs,
- qp);
-
- if (first_object) {
- head_offset += head_size;
- batchbuffer_offset += head_size;
- }
+ int last_mb, slice_end_x, slice_end_y;
+ int remaining_mb = total_mbs;
+ uint32_t fwd_ref , bwd_ref, mb_flag;
- if (last_object) {
- head_offset += tail_size;
- batchbuffer_offset += tail_size;
- }
+ last_mb = slice_param->macroblock_address + total_mbs - 1;
+ slice_end_x = last_mb % width_in_mbs;
+ slice_end_y = last_mb / width_in_mbs;
- batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
+ if (slice_type == SLICE_TYPE_I) {
+ fwd_ref = 0;
+ bwd_ref = 0;
+ mb_flag = 1;
+ } else {
+ fwd_ref = vme_context->ref_index_in_mb[0];
+ bwd_ref = vme_context->ref_index_in_mb[1];
+ mb_flag = 0;
+ }
- first_object = 0;
+ if (width_in_mbs >= 100) {
+ number_mb_cmds = width_in_mbs / 5;
+ } else if (width_in_mbs >= 80) {
+ number_mb_cmds = width_in_mbs / 4;
+ } else if (width_in_mbs >= 60) {
+ number_mb_cmds = width_in_mbs / 3;
+ } else if (width_in_mbs >= 40) {
+ number_mb_cmds = width_in_mbs / 2;
+ } else {
+ number_mb_cmds = width_in_mbs;
}
- if (!last_object) {
- last_object = 1;
- number_mb_cmds = total_mbs % number_mb_cmds;
- mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
- mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
- assert(mb_x <= 255 && mb_y <= 255);
- starting_mb += number_mb_cmds;
+ do {
+ if (number_mb_cmds >= remaining_mb) {
+ number_mb_cmds = remaining_mb;
+ }
+ mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs;
+ mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs;
gen75_mfc_batchbuffer_emit_object_command(batch,
- index,
+ mb_flag,
head_offset,
- batchbuffer_offset,
- head_size,
- tail_size,
number_mb_cmds,
- first_object,
- last_object,
- last_slice,
+ slice_end_x,
+ slice_end_y,
mb_x,
mb_y,
width_in_mbs,
- qp);
- }
+ qp,
+ fwd_ref,
+ bwd_ref);
+
+ head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE);
+ remaining_mb -= number_mb_cmds;
+ starting_offset += number_mb_cmds;
+ } while (remaining_mb > 0);
}
/*
* return size in Owords (16bytes)
*/
-static int
+static void
gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context,
- int slice_index,
- int batchbuffer_offset)
+ int slice_index)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
int slice_header_length_in_bits = 0;
unsigned int tail_data[] = { 0x0, 0x0 };
long head_offset;
- int old_used = intel_batchbuffer_used_size(slice_batch), used;
- unsigned short head_size, tail_size;
int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
if (rate_control_mode == VA_RC_CBR) {
assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
assert(qp >= 0 && qp < 52);
- head_offset = old_used / 16;
gen75_mfc_avc_slice_state(ctx,
pPicParameter,
pSliceParameter,
free(slice_header);
intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
- used = intel_batchbuffer_used_size(slice_batch);
- head_size = (used - old_used) / 16;
- old_used = used;
+ head_offset = intel_batchbuffer_used_size(slice_batch);
+
+ slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE;
+
+ gen75_mfc_avc_batchbuffer_slice_command(ctx,
+ encoder_context,
+ pSliceParameter,
+ head_offset,
+ qp,
+ last_slice);
+
- /* tail */
+ /* Aligned for tail */
+ intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
if (last_slice) {
mfc_context->insert_object(ctx,
encoder_context,
slice_batch);
}
- intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
- used = intel_batchbuffer_used_size(slice_batch);
- tail_size = (used - old_used) / 16;
-
-
- gen75_mfc_avc_batchbuffer_slice_command(ctx,
- encoder_context,
- pSliceParameter,
- head_offset,
- head_size,
- tail_size,
- batchbuffer_offset,
- qp,
- last_slice);
-
- return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
+ return;
}
static void
gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
- size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
- offset += size;
+ gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i);
+ }
+ {
+ struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
+ intel_batchbuffer_align(slice_batch, 8);
+ BEGIN_BCS_BATCH(slice_batch, 2);
+ OUT_BCS_BATCH(slice_batch, 0);
+ OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
+ ADVANCE_BCS_BATCH(slice_batch);
}
-
intel_batchbuffer_end_atomic(batch);
intel_batchbuffer_flush(batch);
}
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
- dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
+ dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
- return mfc_context->mfc_batchbuffer_surface.bo;
+ return mfc_context->aux_batchbuffer_surface.bo;
}
#endif
return;
}
-#ifdef MFC_SOFTWARE_HASWELL
+#if MFC_SOFTWARE_HASWELL
slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
#else
slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
i965_gpe_load_kernels(ctx,
&mfc_context->gpe_context,
gen75_mfc_kernels,
- NUM_MFC_KERNEL);
+ 1);
mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
mfc_context->set_surface_state = gen75_mfc_surface_state;
mfc_batchbuffer_avc_intra.asm \
mfc_batchbuffer_avc_inter.asm
+MFC_CORE_HSW = \
+ mfc_batchbuffer_hsw.asm
+
INTEL_G6B = mfc_batchbuffer_avc_intra.g6b mfc_batchbuffer_avc_inter.g6b
INTEL_G6A = mfc_batchbuffer_avc_intra.g6a mfc_batchbuffer_avc_inter.g6a
INTEL_GEN6_INC = mfc_batchbuffer.inc
INTEL_GEN7_INC = mfc_batchbuffer.inc
INTEL_GEN7_ASM = $(INTEL_G7A:%.g7a=%.gen7.asm)
+INTEL_G75B = mfc_batchbuffer_hsw.g75b
+INTEL_G75A = mfc_batchbuffer_hsw.g75a
+INTEL_GEN75_INC = mfc_batchbuffer_hsw.inc
+INTEL_GEN75_ASM = $(INTEL_G75A:%.g75a=%.gen75.asm)
+
TARGETS =
if HAVE_GEN4ASM
TARGETS += $(INTEL_G6B)
TARGETS += $(INTEL_G7B)
+TARGETS += $(INTEL_G75B)
endif
all-local: $(TARGETS)
-SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm
+SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm .g75a .g75b .gen75.asm
if HAVE_GEN4ASM
$(INTEL_GEN6_ASM): $(MFC_CORE) $(MFC_CORE_AVC) $(INTEL_GEN6_INC)
rm _mfc0.$@
.gen7.asm.g7b:
$(AM_V_GEN)$(GEN4ASM) -g 7 -o $@ $<
+
+$(INTEL_GEN75_ASM): $(MFC_CORE_HSW) $(INTEL_GEN75_INC)
+.g75a.gen75.asm:
+ $(AM_V_GEN)cpp -P $< > _mfc0.$@ && \
+ m4 _mfc0.$@ > $@ && \
+ rm _mfc0.$@
+.gen75.asm.g75b:
+ $(AM_V_GEN)$(GEN4ASM) -g 7.5 -o $@ $<
endif
-CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM)
+CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM) $(INTEL_GEN75_ASM)
EXTRA_DIST = \
$(INTEL_G6A) \
$(INTEL_G6B) \
$(INTEL_G7A) \
$(INTEL_G7B) \
+ $(INTEL_G75A) \
+ $(INTEL_G75B) \
$(INTEL_GEN6_INC) \
$(INTEL_GEN7_INC) \
+ $(INTEL_GEN75_INC) \
$(MFC_CORE) \
$(MFC_CORE_AVC) \
+ $(MFC_CORE_HSW) \
$(NULL)
# Extra clean files so that maintainer-clean removes *everything*
--- /dev/null
+/*
+ * Copyright © 2010-2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhao Yakui <yakui.zhao@intel.com>
+ */
+
+START:
+ mov (16) pak_object_reg0.0<1>:ud 0x0:ud {align1};
+ mov (8) obw_m0.0<1>:ud 0x0:ud {align1};
+ mov (8) mb_cur_msg.0<1>:ud 0x0:ud {align1};
+ mov (16) mb_temp.0<1>:ud 0x0:ud {align1};
+ mov (1) cur_mb_x<1>:uw mb_x<0,1,0>:ub {align1};
+ mov (1) cur_mb_y<1>:uw mb_y<0,1,0>:ub {align1};
+ mov (1) end_mb_x<1>:uw slice_end_x<0,1,0>:ub {align1};
+ mov (1) end_mb_y<1>:uw slice_end_y<0,1,0>:ub {align1};
+ mov (1) end_loop_count<1>:uw total_mbs<0,1,0>:uw {align1};
+ mov (1) vme_len<1>:ud 2:ud {align1};
+ and.z.f0.0 (1) null:uw mb_flag<0,1,0>:ub INTRA_SLICE:uw {align1};
+ (f0.0) mov (1) vme_len<1>:ud 24:ud {align1};
+
+ mov (1) obw_m0.8<1>:UD buffer_offset<0,1,0>:ud {align1};
+ mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+ mul (1) mb_cur_msg.8<1>:UD width_in_mbs<0,1,0>:UW cur_mb_y<0,1,0>:UW {align1};
+ add (1) mb_cur_msg.8<1>:UD mb_cur_msg.8<0,1,0>:UD cur_mb_x<0,1,0>:uw {align1};
+ mul (1) mb_cur_msg.8<1>:UD mb_cur_msg.8<0,1,0>:UD vme_len<0,1,0>:UD {align1};
+ mov (1) mb_cur_msg.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+ mov (1) pak_object0_ud<1>:ud MFC_AVC_PAK_OBJECT_DW0:ud {align1};
+ mov (1) pak_object5_ud<1>:ud MFC_AVC_PAK_OBJECT_DW5:ud {align1};
+ mov (1) pak_object10_ud<1>:ud MFC_AVC_PAK_OBJECT_DW10:ud {align1};
+ mov (1) pak_object6_ud<1>:ub qp_flag<0,1,0>:ub {align1};
+
+pak_object_loop:
+ mov (8) mb_msg0.0<1>:ud mb_cur_msg.0<8,8,1>:ud {align1};
+ mov (1) pak_object4_ud<1>:ud MFC_AVC_PAK_OBJECT_DW4:ud {align1};
+ mov (1) tmp_reg0.0<1>:ub cur_mb_x<0,1,0>:ub {align1};
+ mov (1) tmp_reg0.1<1>:ub cur_mb_y<0,1,0>:ub {align1};
+ mov (1) pak_object4_ud<1>:uw tmp_reg0.0<0,1,0>:uw {align1};
+ /* pak_object6_ud */
+ mov (1) pak_object_reg0.26<1>:uw 0x0:uw {align1};
+
+ cmp.e.f0.0 (1) null:uw cur_mb_x<0,1,0>:uw end_mb_x<0,1,0>:uw {align1};
+ (-f0.0) jmpi (1) start_mb_flag;
+ cmp.e.f0.0 (1) null:uw cur_mb_y<0,1,0>:uw end_mb_y<0,1,0>:uw {align1};
+ (f0.0) mov (1) pak_object_reg0.26<1>:uw MFC_AVC_PAK_LAST_MB:uw {align1};
+start_mb_flag:
+ and.z.f0.0 (1) null:uw mb_flag<0,1,0>:ub INTRA_SLICE:uw {align1};
+ (f0.0) jmpi (1) inter_frame_start;
+
+/* bind index 0, read 2 oword (32bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ null
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_2,
+ MV_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+ jmpi (1) intra_pak_command;
+
+nop;
+nop;
+inter_frame_start:
+/* bind index 0, read 4 oword (64bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_wb.0<1>:ud
+ null
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_4,
+ MV_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 2
+ {align1};
+
+/* TODO: RefID is required after multi-references are added */
+cmp.l.f0.0 (1) null:w mb_intra_wb.16<0,1,0>:uw mb_inter_wb.8<0,1,0>:uw {align1};
+(f0.0) jmpi (1) intra_pak_command;
+
+/* MV len and MV mode */
+ and (1) pak_object3_ud<1>:ud mb_inter_wb.0<0,1,0>:ud MFC_AVC_INTER_MASK_DW3:ud {align1};
+ add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud MFC_AVC_PAK_CBP:ud {align1};
+ and (1) tmp_reg0.0<1>:uw mb_inter_wb.0<0,1,0>:uw INTER_MASK:uw {align1};
+ mov (1) pak_object1_ud<1>:ud 32:ud {align1};
+ cmp.e.f0.0 (1) null:uw tmp_reg0.0<0,1,0>:uw INTER_8X8MODE:uw {align1};
+ (-f0.0) add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud INTER_MV8:ud {align1};
+ (-f0.0) jmpi (1) inter_mv_check;
+ and.nz.f0.0 (1) null:ud mb_inter_wb.4<0,1,0>:uw SUBSHAPE_MASK:uw {align1};
+ (f0.0) mov (1) pak_object1_ud<1>:ud 128:ud {align1};
+ (f0.0) add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud INTER_MV32:ud {align1};
+ (f0.0) jmpi (1) mv_check_end;
+
+ add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud INTER_MV8:ud {align1};
+
+inter_mv_check:
+ and (1) tmp_reg0.0<1>:uw mb_inter_wb.0<0,1,0>:uw INTER_MASK:uw {align1};
+ cmp.e.f0.0 (1) null:uw tmp_reg0.0<0,1,0>:uw INTER_16X16MODE:uw {align1};
+ (f0.0) jmpi (1) mv_check_end;
+
+add (1) mb_msg0.8<1>:UD mb_msg0.8<0,1,0>:ud 3:ud {align1};
+/* Read MV for MB A */
+/* bind index 0, read 8 oword (128bytes), msg type: 0(OWord Block Read) */
+send (16)
+ mb_ind
+ mb_mv0.0<1>:ud
+ null
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_TYPE,
+ OBR_CONTROL_8,
+ MV_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 4
+ {align1};
+/* TODO: RefID is required after multi-references are added */
+
+ mov (2) mb_mv0.8<1>:ud mb_mv1.0<2,2,1>:ud {align1};
+ mov (2) mb_mv0.16<1>:ud mb_mv2.0<2,2,1>:ud {align1};
+ mov (2) mb_mv0.24<1>:ud mb_mv3.0<2,2,1>:ud {align1};
+
+ mov (8) msg_reg0.0<1>:ud mb_msg0.0<8,8,1>:ud {align1} ;
+ mov (8) msg_reg1.0<1>:ud mb_mv0.0<8,8,1>:ud {align1} ;
+/* Write MV for MB A */
+/* bind index 0, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ MV_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+mv_check_end:
+
+/* ref list */
+ mov (1) pak_object8_ud<1>:ud fwd_ref<0,1,0>:ud {align1};
+ mov (1) pak_object9_ud<1>:ud bwd_ref<0,1,0>:ud {align1};
+/* inter_mode. pak_object7_ud */
+ mov (1) pak_object7_ud<1>:ud 0x0:ud {align1};
+ mov (1) pak_object_reg0.28<1>:ub mb_inter_wb.5<0,1,0>:ub {align1};
+ mov (1) pak_object_reg0.29<1>:ub mb_inter_wb.6<0,1,0>:ub {align1};
+
+/* mv start address */
+ add (1) tmp_reg0.4<1>:ud mb_cur_msg.8<0,1,0>:ud 3:ud {align1};
+ mul (1) pak_object2_ud<1>:ud tmp_reg0.4<0,1,0>:ud 16:ud {align1};
+
+ jmpi (1) write_pak_command;
+
+intra_pak_command:
+ /* object 1/2 is set to zero */
+ mov (2) pak_object1_ud<1>:ud 0x0:ud {align1};
+ /* object 7/8 intra mode */
+ mov (1) pak_object7_ud<1>:ud mb_intra_wb.4<0,1,0>:ud {align1};
+ mov (1) pak_object8_ud<1>:ud mb_intra_wb.8<0,1,0>:ud {align1};
+ /* object 9 Intra structure */
+ mov (1) pak_object9_ud<1>:ud 0x0:ud {align1};
+ mov (1) pak_object9_ud<1>:ub mb_intra_wb.12<0,1,0>:ub {align1};
+
+ and (1) pak_object3_ud<1>:ud mb_intra_wb.0<0,1,0>:ud MFC_AVC_INTRA_MASK_DW3:ud {align1};
+ add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud MFC_AVC_INTRA_FLAG + MFC_AVC_PAK_CBP:ud {align1};
+
+ mov (1) tmp_reg0.0<1>:ud 0:ud {align1};
+ mov (1) tmp_reg0.1<1>:ub mb_intra_wb.2<0,1,0>:ub {align1};
+ and (1) tmp_reg0.0<1>:uw tmp_reg0.0<0,1,0>:uw AVC_INTRA_MASK:uw {align1};
+ add (1) pak_object3_ud<1>:ud pak_object3_ud<0,1,0>:ud tmp_reg0.0<0,1,0>:ud {align1};
+
+/* Write the pak command into the batchbuffer */
+write_pak_command:
+ mov (8) msg_reg0.0<1>:ud obw_m0.0<8,8,1>:ud {align1} ;
+ mov (8) msg_reg1.0<1>:ud pak_object_reg0.0<8,8,1>:ud {align1} ;
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ MFC_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+ add (1) msg_reg0.8<1>:ud msg_reg0.8<0,1,0>:ud 2:ud {align1};
+ mov (8) msg_reg1.0<1>:ud pak_object_reg1.0<8,8,1>:ud {align1};
+
+/* bind index 3, write 1 oword (16bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_0,
+ MFC_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+
+/* Check the next mb */
+add (1) cur_loop_count<1>:uw cur_loop_count<0,1,0>:uw 1:uw {align1};
+cmp.e.f0.0 (1) null:uw cur_loop_count<0,1,0>:uw end_loop_count<0,1,0>:uw {align1};
+(f0.0) jmpi (1) pak_loop_end;
+/* the buffer offset for next block */
+add (1) obw_m0.8<1>:ud obw_m0.8<0,1,0>:ud 3:uw {align1};
+add (1) mb_cur_msg.8<1>:ud mb_cur_msg.8<0,1,0>:ud vme_len<0,1,0>:ud {align1};
+add (1) cur_mb_x<1>:uw cur_mb_x<0,1,0>:uw 1:uw {align1};
+/* Check whether it is already equal to width in mbs */
+cmp.e.f0.0 (1) null:uw cur_mb_x<0,1,0>:uw width_in_mbs<0,1,0>:uw {align1};
+(f0.0) add (1) cur_mb_y<1>:uw cur_mb_y<0,1,0>:uw 1:uw {align1};
+(f0.0) mov (1) cur_mb_x<1>:uw 0:uw {align1};
+
+/* continue the pak command for next mb */
+jmpi (1) pak_object_loop;
+nop;
+nop;
+pak_loop_end:
+/* Issue message fence so that the previous write message is committed */
+send (16)
+ msg_ind
+ mb_wb.0<1>:ud
+ null
+ data_port(
+ OBR_CACHE_TYPE,
+ OBR_MESSAGE_FENCE,
+ OBR_MF_COMMIT,
+ MFC_BIND_IDX,
+ OBR_WRITE_COMMIT_CATEGORY,
+ OBR_HEADER_PRESENT
+ )
+ mlen 1
+ rlen 1
+ {align1};
+
+__EXIT:
+/*
+ * kill thread
+ */
+mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1};
+send (1) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
+
+nop;
+
--- /dev/null
+/*
+ * Copyright © 2010-2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhao Yakui <yakui.zhao@intel.com>
+ */
+
+#include "mfc_batchbuffer_hsw.inc"
+#include "mfc_batchbuffer_hsw.asm"
+
--- /dev/null
+ { 0x00800001, 0x23400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x2b000061, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x2ac00061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x2ac00229, 0x000000a8, 0x00000000 },
+ { 0x00000001, 0x2ac20229, 0x000000a9, 0x00000000 },
+ { 0x00000001, 0x2ae00229, 0x000000b0, 0x00000000 },
+ { 0x00000001, 0x2ae20229, 0x000000b1, 0x00000000 },
+ { 0x00000001, 0x2ae40129, 0x000000ac, 0x00000000 },
+ { 0x00000001, 0x2ae80061, 0x00000000, 0x00000002 },
+ { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 },
+ { 0x00010001, 0x2ae80061, 0x00000000, 0x00000018 },
+ { 0x00000001, 0x21e80021, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x21f40231, 0x00000014, 0x00000000 },
+ { 0x00000041, 0x2b082521, 0x000000aa, 0x00000ac2 },
+ { 0x00000040, 0x2b082421, 0x00000b08, 0x00000ac0 },
+ { 0x00000041, 0x2b080421, 0x00000b08, 0x00000ae8 },
+ { 0x00000001, 0x2b140231, 0x00000014, 0x00000000 },
+ { 0x00000001, 0x23400061, 0x00000000, 0x7149000a },
+ { 0x00000001, 0x23540061, 0x00000000, 0x000f000f },
+ { 0x00000001, 0x23680061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23580231, 0x000000a6, 0x00000000 },
+ { 0x00600001, 0x2b400021, 0x008d0b00, 0x00000000 },
+ { 0x00000001, 0x23500061, 0x00000000, 0xffff0000 },
+ { 0x00000001, 0x21000231, 0x00000ac0, 0x00000000 },
+ { 0x00000001, 0x21010231, 0x00000ac2, 0x00000000 },
+ { 0x00000001, 0x23500129, 0x00000100, 0x00000000 },
+ { 0x00000001, 0x235a0169, 0x00000000, 0x00000000 },
+ { 0x01000010, 0x20002528, 0x00000ac0, 0x00000ae0 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000020 },
+ { 0x01000010, 0x20002528, 0x00000ac2, 0x00000ae2 },
+ { 0x00010001, 0x235a0169, 0x00000000, 0x04000400 },
+ { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000040 },
+ { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x02180200 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000240 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0a800031, 0x2b601ca1, 0x00000b40, 0x02280300 },
+ { 0x05000010, 0x2000252c, 0x00000b70, 0x00000b88 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x000001f0 },
+ { 0x00000005, 0x234c0c21, 0x00000b80, 0x1f00ffff },
+ { 0x00000040, 0x234c0c21, 0x0000034c, 0x000e0000 },
+ { 0x00000005, 0x21002d29, 0x00000b80, 0x00030003 },
+ { 0x00000001, 0x23440061, 0x00000000, 0x00000020 },
+ { 0x01000010, 0x20002d28, 0x00000100, 0x00030003 },
+ { 0x00110040, 0x234c0c21, 0x0000034c, 0x00400000 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000050 },
+ { 0x02000005, 0x20002d20, 0x00000b84, 0xff00ff00 },
+ { 0x00010001, 0x23440061, 0x00000000, 0x00000080 },
+ { 0x00010040, 0x234c0c21, 0x0000034c, 0x00600000 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x000000c0 },
+ { 0x00000040, 0x234c0c21, 0x0000034c, 0x00400000 },
+ { 0x00000005, 0x21002d29, 0x00000b80, 0x00030003 },
+ { 0x01000010, 0x20002d28, 0x00000100, 0x00000000 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000080 },
+ { 0x00000040, 0x2b480c21, 0x00000b48, 0x00000003 },
+ { 0x0a800031, 0x2ba01ca1, 0x00000b40, 0x02480400 },
+ { 0x00200001, 0x2ba80021, 0x00450bc0, 0x00000000 },
+ { 0x00200001, 0x2bb00021, 0x00450be0, 0x00000000 },
+ { 0x00200001, 0x2bb80021, 0x00450c00, 0x00000000 },
+ { 0x00600001, 0x28000021, 0x008d0b40, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d0ba0, 0x00000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0200 },
+ { 0x00000001, 0x23600021, 0x000000b4, 0x00000000 },
+ { 0x00000001, 0x23640021, 0x000000b8, 0x00000000 },
+ { 0x00000001, 0x235c0061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x235c0231, 0x00000b85, 0x00000000 },
+ { 0x00000001, 0x235d0231, 0x00000b86, 0x00000000 },
+ { 0x00000040, 0x21040c21, 0x00000b08, 0x00000003 },
+ { 0x00000041, 0x23480c21, 0x00000104, 0x00000010 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x000000b0 },
+ { 0x00200001, 0x23440061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x235c0021, 0x00000b64, 0x00000000 },
+ { 0x00000001, 0x23600021, 0x00000b68, 0x00000000 },
+ { 0x00000001, 0x23640061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23640231, 0x00000b6c, 0x00000000 },
+ { 0x00000005, 0x234c0c21, 0x00000b60, 0x0000c0ff },
+ { 0x00000040, 0x234c0c21, 0x0000034c, 0x000e2000 },
+ { 0x00000001, 0x21000061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x21010231, 0x00000b62, 0x00000000 },
+ { 0x00000005, 0x21002d29, 0x00000100, 0x1f001f00 },
+ { 0x00000040, 0x234c0421, 0x0000034c, 0x00000100 },
+ { 0x00600001, 0x28000021, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d0340, 0x00000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0202 },
+ { 0x00000040, 0x28080c21, 0x00000808, 0x00000002 },
+ { 0x00600001, 0x28200021, 0x008d0360, 0x00000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0002 },
+ { 0x00000040, 0x2ac42d29, 0x00000ac4, 0x00010001 },
+ { 0x01000010, 0x20002528, 0x00000ac4, 0x00000ae4 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000090 },
+ { 0x00000040, 0x21e82c21, 0x000001e8, 0x00030003 },
+ { 0x00000040, 0x2b080421, 0x00000b08, 0x00000ae8 },
+ { 0x00000040, 0x2ac02d29, 0x00000ac0, 0x00010001 },
+ { 0x01000010, 0x20002528, 0x00000ac0, 0x000000aa },
+ { 0x00010040, 0x2ac22d29, 0x00000ac2, 0x00010001 },
+ { 0x00010001, 0x2ac00169, 0x00000000, 0x00000000 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0xfffffb30 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0a800031, 0x2b601ca1, 0x00000800, 0x0219e002 },
+ { 0x00600001, 0x2e000021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x24001ca8, 0x00000e00, 0x82000010 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
--- /dev/null
+/*
+ * Copyright © 2010-2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhao Yakui <yakui.zhao@intel.com>
+ */
+
+/* GRF registers
+ * r0 header
+ * r1~r4 constant buffer (reserved)
+ * r5 inline data
+ * r6~r7 reserved
+ * r8~r15 temporary registers
+ * r16 write back of Oword Block Write
+ */
+
+/*
+ * GRF 0 -- header
+ */
+define(`thread_id_ub', `r0.20<0,1,0>:UB') /* thread id in payload */
+
+define(`inline_reg0', `r5')
+define(`buffer_offset', `inline_reg0.0') /* :ud, in units of Owords */
+/* :ub,
+ * bit0 indicates the frame type. 1 is the I-frame. 0 is P-B frame
+ */
+define(`mb_flag', `inline_reg0.4')
+define(`qp_flag', `inline_reg0.6') /* :ub */
+
+define(`mb_x', `inline_reg0.8') /* :ub, */
+define(`mb_y', `inline_reg0.9') /* :ub, */
+define(`mb_xy', `inline_reg0.8') /* :uw, */
+/* :uw, the picture width in macroblocks */
+define(`width_in_mbs', `inline_reg0.10')
+/* :w, the number of macroblock commands being processed by the kernel */
+define(`total_mbs', `inline_reg0.12')
+/* ub, the mb x/y of the last mb in slice */
+define(`slice_end_x', `inline_reg0.16')
+define(`slice_end_y', `inline_reg0.17')
+
+/* :ud the forward reference picture list */
+define(`fwd_ref', `inline_reg0.20')
+/* :ud the backward reference picture list */
+define(`bwd_ref', `inline_reg0.24')
+
+/*
+ * GRF 8~15 -- temporary registers
+ */
+define(`tmp_reg0', `r8')
+define(`tmp_reg1', `r9')
+define(`tmp_reg2', `r10')
+define(`tmp_reg3', `r11')
+define(`tmp_reg4', `r12')
+define(`tmp_reg5', `r13')
+define(`tmp_reg6', `r14')
+define(`tmp_reg7', `r15')
+
+define(`obw_m0', `tmp_reg7')
+
+define(`obw_wb', `null<1>:W')
+define(`obw_wb_length', `0')
+
+/*
+ * GRF 26~27
+ */
+define(`pak_object_reg0', `r26')
+define(`pak_object0_ud', `r26.0')
+define(`pak_object1_ud', `r26.4')
+define(`pak_object2_ud', `r26.8')
+define(`pak_object3_ud', `r26.12')
+define(`pak_object4_ud', `r26.16')
+define(`pak_object5_ud', `r26.20')
+define(`pak_object6_ud', `r26.24')
+define(`pak_object7_ud', `r26.28')
+
+define(`pak_object_reg1', `r27')
+define(`pak_object8_ud', `r27.0')
+define(`pak_object9_ud', `r27.4')
+define(`pak_object10_ud', `r27.8')
+define(`pak_object11_ud', `r27.12')
+
+/*
+ * Message Payload registers
+ */
+define(`msg_ind', `64')
+define(`msg_reg0', `g64')
+define(`msg_reg1', `g65')
+define(`msg_reg2', `g66')
+define(`msg_reg3', `g67')
+define(`msg_reg4', `g68')
+define(`msg_reg5', `g69')
+define(`msg_reg6', `g70')
+define(`msg_reg7', `g71')
+define(`msg_reg8', `g72')
+
+define(`MV_BIND_IDX', `0')
+define(`MFC_BIND_IDX', `2')
+
+define(`ts_msg_ind', `112')
+define(`ts_msg_reg0', `r112')
+
+
+define(`MFC_AVC_PAK_OBJECT_DW0', `0x7149000a')
+define(`MFC_AVC_PAK_OBJECT_DW4', `0xFFFF0000') /* CBP for Y */
+define(`MFC_AVC_PAK_OBJECT_DW5', `0x000F000F')
+define(`MFC_AVC_PAK_OBJECT_DW10', `0x0000000')
+
+define(`OBR_MESSAGE_TYPE', `0')
+define(`OBR_CACHE_TYPE', `10')
+
+define(`OBR_MESSAGE_FENCE', `7')
+define(`OBR_MF_NOCOMMIT', `0')
+define(`OBR_MF_COMMIT', `0x20')
+
+define(`OBR_CONTROL_0', `0') /* 1 OWord, low 128 bits */
+define(`OBR_CONTROL_1', `1') /* 1 OWord, high 128 bits */
+define(`OBR_CONTROL_2', `2') /* 2 OWords */
+define(`OBR_CONTROL_4', `3') /* 4 OWords */
+define(`OBR_CONTROL_8', `4') /* 8 OWords */
+
+define(`OBR_HEADER_PRESENT', `1')
+define(`OBR_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */
+
+define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */
+
+define(`OBW_CACHE_TYPE', `10')
+
+
+define(`OBW_MESSAGE_TYPE', `8')
+
+define(`OBW_CONTROL_0', `0') /* 1 OWord, low 128 bits */
+define(`OBW_CONTROL_1', `1') /* 1 OWord, high 128 bits */
+define(`OBW_CONTROL_2', `2') /* 2 OWords */
+define(`OBW_CONTROL_4', `3') /* 4 OWords */
+define(`OBW_CONTROL_8', `4') /* 8 OWords */
+define(`OBW_HEADER_PRESENT', `1')
+
+define(`INTER_MASK', `0x03')
+define(`INTER_16X16MODE', `0x0')
+define(`INTER_16X8MODE', `0x01')
+define(`INTER_8X16MODE', `0x02')
+define(`INTER_8X8MODE', `0x03')
+define(`SUBSHAPE_MASK', `0xFF00')
+
+define(`mb_ind', `90')
+define(`mb_msg0', `r90')
+define(`mb_wb', `r91')
+define(`mb_intra_wb', `r91')
+define(`mb_inter_wb', `r92')
+define(`mb_mv0', `r93')
+define(`mb_mv1', `r94')
+define(`mb_mv2', `r95')
+define(`mb_mv3', `r96')
+
+define(`mb_temp', `r86')
+define(`cur_mb_x', `mb_temp.0') /* :uw, */
+define(`cur_mb_y', `mb_temp.2') /* :uw, */
+define(`cur_loop_count', `mb_temp.4') /* :uw, */
+define(`mb_end', `r87')
+define(`end_mb_x', `mb_end.0') /* :uw, */
+define(`end_mb_y', `mb_end.2') /* :uw, */
+define(`end_loop_count', `mb_end.4') /* :uw, */
+/* :ud the length of VME predict result for every mb. Units in owords */
+define(`vme_len', `mb_end.8')
+define(`mb_cur_msg', `r88')
+
+define(`INTRA_SLICE', `0x0001')
+define(`MFC_AVC_PAK_LAST_MB', `0x0400')
+
+define(`MFC_AVC_INTER_MASK_DW3', `0x1F00FFFF')
+define(`MFC_AVC_INTRA_MASK_DW3', `0x0000C0FF')
+define(`INTER_MV8', `0x00400000')
+define(`INTER_MV32', `0x00600000')
+define(`MFC_AVC_PAK_CBP', `0x000E0000')
+define(`MFC_AVC_INTRA_FLAG', `0x00002000')
+define(`AVC_INTRA_MASK', `0x1F00')