-VME_CORE = intra_frame.asm inter_frame.asm
+VME_CORE = batchbuffer.asm intra_frame.asm inter_frame.asm
INTEL_G6B = intra_frame.g6b inter_frame.g6b
INTEL_G6A = intra_frame.g6a inter_frame.g6a
INTEL_GEN6_INC = gen6_vme_header.inc
INTEL_GEN6_ASM = $(INTEL_G6A:%.g6a=%.gen6.asm)
-INTEL_G7B = intra_frame.g7b inter_frame.g7b
-INTEL_G7A = intra_frame.g7a inter_frame.g7a
-INTEL_GEN7_INC = gen7_vme_header.inc
+INTEL_G7B = batchbuffer.g7b intra_frame.g7b inter_frame.g7b
+INTEL_G7A = batchbuffer.g7a intra_frame.g7a inter_frame.g7a
+INTEL_GEN7_INC = gen7_batchbuffer_header.inc gen7_vme_header.inc
INTEL_GEN7_ASM = $(INTEL_G7A:%.g7a=%.gen7.asm)
TARGETS =
--- /dev/null
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Xiang Haihao <haihao.xiang@intel.com>
+ */
+
+/*
+ * __START
+ */
+__INTER_START:
+ and.z.f0.1 (1) remainder_cmds<1>:uw total_mbs<0,1,0>:uw 0x0003:uw {align1};
+ and.z.f0.0 (1) total_mbs<1>:uw total_mbs<0,1,0>:uw 0xfffc:uw {align1};
+
+ mov (16) tmp_reg0<1>:ud 0x0:ud {align1} ;
+
+ mov (8) media_object_ud<1>:ud 0x0:ud {align1} ;
+ mov (1) media_object0_ud<1>:ud CMD_MEDIA_OBJECT {align1} ;
+ mov (1) media_object1_ud<1>:ud mtype_ub<0,1,0>ub {align1};
+ mov (1) media_object6_width<1>:uw width_in_mb<0,1,0>:uw {align1};
+ mov (1) media_object7_ud<1>:ud transform_8x8_ub<0,1,0>ub {align1};
+
+ mul (1) tmp_reg0.8<1>:ud width_in_mb<0,1,0>:uw mb_y<0,1,0>:ub {align1};
+ add (1) tmp_reg0.8<1>:ud tmp_reg0.8<0,1,0>:ud mb_x<0,1,0>:ub {align1};
+ mul (1) tmp_reg0.8<1>:ud tmp_reg0.8<0,1,0>:ud 0x2:ud {align1} ;
+ mov (1) tmp_reg0.20<1>:ub thread_id_ub {align1}; /* dispatch id */
+
+ (f0.0)jmpi (1) __REMAINDER ;
+
+__CMD_LOOP:
+ mov (8) msg_reg0.0<1>:ud tmp_reg0<8,8,1>:ud {align1};
+ add (1) tmp_reg0.8<1>:ud tmp_reg0.8<0,1,0>:ud 8:uw {align1} ;
+
+ mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ;
+ mov (8) msg_reg1<1>:ud media_object_ud<8,8,1>:ud {align1};
+ add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1};
+ cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1};
+ (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ;
+ (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ;
+
+ mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ;
+ mov (8) msg_reg2<1>:ud media_object_ud<8,8,1>:ud {align1};
+ add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1};
+ cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1};
+ (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ;
+ (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ;
+
+ mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ;
+ mov (8) msg_reg3<1>:ud media_object_ud<8,8,1>:ud {align1};
+ add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1};
+ cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1};
+ (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ;
+ (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ;
+
+ mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ;
+ mov (8) msg_reg4<1>:ud media_object_ud<8,8,1>:ud {align1};
+ add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1};
+ cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1};
+ (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ;
+ (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ;
+
+/* bind index 5, write 8 oword, msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_4,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 5
+ rlen obw_wb_length
+ {align1};
+
+
+ add.z.f0.0 (1) total_mbs<1>:w total_mbs<0,1,0>:w -4:w {align1};
+ (-f0.0)jmpi (1) __CMD_LOOP ;
+
+__REMAINDER:
+ (f0.1)jmpi (1) __DONE ;
+
+__REMAINDER_LOOP:
+ mov (8) msg_reg0.0<1>:ud tmp_reg0<8,8,1>:ud {align1} ;
+ add (1) tmp_reg0.8<1>:ud tmp_reg0.8<0,1,0>:ud 2:uw {align1} ;
+
+ mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ;
+ mov (8) msg_reg1<1>:ud media_object_ud<8,8,1>:ud {align1};
+ add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1};
+ cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1};
+ (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ;
+ (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ;
+
+/* bind index 5, write 2 oword, msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+ add.z.f0.1 (1) remainder_cmds<1>:w remainder_cmds<0,1,0>:w -1:w;
+ (-f0.1)jmpi (1) __REMAINDER_LOOP ;
+
+__DONE:
+
+ cmp.e.f0.0 (1) null<1>:uw last_object<0,1,0>:uw 1:uw {align1};
+ (-f0.0)jmpi (1) __EXIT ;
+
+/* bind index 5, write 1 oword, msg type: 8(OWord Block Write) */
+ mov (8) msg_reg0.0<1>:ud tmp_reg0<8,8,1>:ud {align1} ;
+ mov (4) msg_reg1.0<1>:ud 0x0:ud {align1} ;
+ mov (1) msg_reg1.4<1>:ud MI_BATCH_BUFFER_END {align1} ;
+
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_0,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+__EXIT:
+ mov (8) msg_reg0<1>:ud r0<8,8,1>:ud {align1} ;
+ send (16) msg_ind acc0<1>ud null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT} ;
--- /dev/null
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Xiang Haihao <haihao.xiang@intel.com>
+ */
+
+include(`gen7_batchbuffer_header.inc')
+include(`batchbuffer.asm')
+
--- /dev/null
+ { 0x01000005, 0x22202d29, 0x020000a6, 0x00030003 },
+ { 0x01000005, 0x20a62d29, 0x000000a6, 0xfffcfffc },
+ { 0x00800001, 0x21000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x22000061, 0x00000000, 0x71000006 },
+ { 0x00000001, 0x22040221, 0x000000a3, 0x00000000 },
+ { 0x00000001, 0x221a0129, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x221c0221, 0x000000a2, 0x00000000 },
+ { 0x00000041, 0x21084521, 0x000000a0, 0x000000a5 },
+ { 0x00000040, 0x21084421, 0x00000108, 0x000000a4 },
+ { 0x00000041, 0x21080c21, 0x00000108, 0x00000002 },
+ { 0x00000001, 0x21140231, 0x00000014, 0x00000000 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x0000003a },
+ { 0x00600001, 0x28000021, 0x008d0100, 0x00000000 },
+ { 0x00000040, 0x21082c21, 0x00000108, 0x00080008 },
+ { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d0200, 0x00000000 },
+ { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
+ { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
+ { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
+ { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
+ { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
+ { 0x00600001, 0x28400021, 0x008d0200, 0x00000000 },
+ { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
+ { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
+ { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
+ { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
+ { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
+ { 0x00600001, 0x28600021, 0x008d0200, 0x00000000 },
+ { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
+ { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
+ { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
+ { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
+ { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
+ { 0x00600001, 0x28800021, 0x008d0200, 0x00000000 },
+ { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
+ { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
+ { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
+ { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x0a0a0405 },
+ { 0x01000040, 0x20a63dad, 0x000000a6, 0xfffcfffc },
+ { 0x00110020, 0x34001c00, 0x00001400, 0xffffffc6 },
+ { 0x00010020, 0x34001c00, 0x02001400, 0x00000016 },
+ { 0x00600001, 0x28000021, 0x008d0100, 0x00000000 },
+ { 0x00000040, 0x21082c21, 0x00000108, 0x00020002 },
+ { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d0200, 0x00000000 },
+ { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 },
+ { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 },
+ { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 },
+ { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0205 },
+ { 0x01000040, 0x22203dad, 0x02000220, 0xffffffff },
+ { 0x00110020, 0x34001c00, 0x02001400, 0xffffffea },
+ { 0x01000010, 0x20002d28, 0x000000a8, 0x00010001 },
+ { 0x00110020, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00600001, 0x28000021, 0x008d0100, 0x00000000 },
+ { 0x00400001, 0x28200061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x28240061, 0x00000000, 0x05000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0005 },
+ { 0x00600001, 0x28000021, 0x008d0000, 0x00000000 },
+ { 0x07800031, 0x24001ca0, 0x00000800, 0x82000010 },
--- /dev/null
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Xiang Haihao <haihao.xiang@intel.com>
+ */
+
+define(`BIND_IDX_OUTPUT', `0')
+define(`BIND_IDX_VME', `1')
+define(`BIND_IDX_VME_REF0', `2')
+define(`BIND_IDX_VME_REF1', `3')
+define(`BIND_IDX_INEP', `4')
+define(`BIND_IDX_VME_BATCHBUFFER', `5')
+
+define(`OBW_CACHE_TYPE', `10')
+
+define(`OBW_MESSAGE_TYPE', `8')
+
+define(`OBW_BIND_IDX', `BIND_IDX_VME_BATCHBUFFER')
+
+define(`OBW_CONTROL_0', `0') /* 1 OWord, low 128 bits */
+define(`OBW_CONTROL_1', `1') /* 1 OWord, high 128 bits */
+define(`OBW_CONTROL_2', `2') /* 2 OWords */
+define(`OBW_CONTROL_3', `3') /* 4 OWords */
+define(`OBW_CONTROL_4', `4') /* 8 OWords */
+
+define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */
+
+define(`OBW_HEADER_PRESENT', `1')
+
+define(`CMD_MEDIA_OBJECT', `0x71000006:UD')
+define(`MI_BATCH_BUFFER_END', `0x05000000:UD')
+
+/* GRF registers
+ * r0 header
+ * r1~r4 constant buffer (reserved)
+ * r5 inline data
+ * r6~r7 reserved
+ * r8~r15 temporary registers
+ * r16 write back of Oword Block Write
+ */
+/*
+ * GRF 0 -- header
+ */
+define(`thread_id_ub', `r0.20<0,1,0>:UB') /* thread id in payload */
+
+/*
+ * GRF 1~4 -- Constant Buffer (reserved)
+ */
+
+/*
+ * GRF 5 -- inline data
+ */
+define(`inline_reg0', `r5')
+define(`width_in_mb', `inline_reg0.0') /* the picture width in macroblocks */
+define(`transform_8x8_ub', `inline_reg0.2') /* transform_8x8 flag */
+define(`mtype_ub', `inline_reg0.3') /* 0: INTRA, 1: INTER */
+define(`mb_x', `inline_reg0.4')
+define(`mb_y', `inline_reg0.5')
+define(`mb_xy', `inline_reg0.4')
+define(`total_mbs', `inline_reg0.6') /* the number of macroblock commands
+ * being processed by the kernel
+ */
+define(`last_object', `inline_reg0.8') /* the last object flag */
+/*
+ * GRF 8~15 -- temporary registers
+ */
+define(`tmp_reg0', `r8')
+define(`tmp_reg1', `r9')
+define(`tmp_reg2', `r10')
+define(`tmp_reg3', `r11')
+define(`tmp_reg4', `r12')
+define(`tmp_reg5', `r13')
+define(`tmp_reg6', `r14')
+define(`tmp_reg7', `r15')
+
+/*
+ * GRF 16
+ */
+define(`media_object_ud', `r16.0')
+define(`media_object0_ud', `r16.0')
+define(`media_object1_ud', `r16.4')
+define(`media_object2_ud', `r16.8')
+define(`media_object3_ud', `r16.12')
+define(`media_object4_ud', `r16.16')
+define(`media_object5_ud', `r16.20')
+define(`media_object6_ud', `r16.24')
+define(`media_object6_xy', `r16.24')
+define(`media_object6_width', `r16.26')
+define(`media_object7_ud', `r16.28')
+
+/*
+ * GRF 17
+ */
+define(`remainder_cmds', `r17.0')
+
+/*
+ * GRF 16 write back for Oword Block Write message
+ */
+
+/*
+ * write commit is removed on Ivybridge
+ */
+define(`obw_wb', `null<1>:W')
+define(`obw_wb_length', `0')
+
+
+/*
+ * Message Payload registers
+ */
+define(`msg_ind', `64')
+define(`msg_reg0', `g64')
+define(`msg_reg1', `g65')
+define(`msg_reg2', `g66')
+define(`msg_reg3', `g67')
+define(`msg_reg4', `g68')
+define(`msg_reg5', `g69')
+define(`msg_reg6', `g70')
+define(`msg_reg7', `g71')
+define(`msg_reg8', `g72')