From 4775a5a5fa2b60739985134621eb811dcbe0a5e9 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 14 Feb 2012 09:28:57 +0800 Subject: [PATCH] A new shader to create VME batchbuffer on Ivybridge Signed-off-by: Xiang, Haihao --- src/shaders/vme/Makefile.am | 8 +- src/shaders/vme/batchbuffer.asm | 164 ++++++++++++++++++++++++++++ src/shaders/vme/batchbuffer.g7a | 29 +++++ src/shaders/vme/batchbuffer.g7b | 62 +++++++++++ src/shaders/vme/gen7_batchbuffer_header.inc | 139 +++++++++++++++++++++++ 5 files changed, 398 insertions(+), 4 deletions(-) create mode 100644 src/shaders/vme/batchbuffer.asm create mode 100644 src/shaders/vme/batchbuffer.g7a create mode 100644 src/shaders/vme/batchbuffer.g7b create mode 100644 src/shaders/vme/gen7_batchbuffer_header.inc diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am index d58a0be..e9786f3 100644 --- a/src/shaders/vme/Makefile.am +++ b/src/shaders/vme/Makefile.am @@ -1,13 +1,13 @@ -VME_CORE = intra_frame.asm inter_frame.asm +VME_CORE = batchbuffer.asm intra_frame.asm inter_frame.asm INTEL_G6B = intra_frame.g6b inter_frame.g6b INTEL_G6A = intra_frame.g6a inter_frame.g6a INTEL_GEN6_INC = gen6_vme_header.inc INTEL_GEN6_ASM = $(INTEL_G6A:%.g6a=%.gen6.asm) -INTEL_G7B = intra_frame.g7b inter_frame.g7b -INTEL_G7A = intra_frame.g7a inter_frame.g7a -INTEL_GEN7_INC = gen7_vme_header.inc +INTEL_G7B = batchbuffer.g7b intra_frame.g7b inter_frame.g7b +INTEL_G7A = batchbuffer.g7a intra_frame.g7a inter_frame.g7a +INTEL_GEN7_INC = gen7_batchbuffer_header.inc gen7_vme_header.inc INTEL_GEN7_ASM = $(INTEL_G7A:%.g7a=%.gen7.asm) TARGETS = diff --git a/src/shaders/vme/batchbuffer.asm b/src/shaders/vme/batchbuffer.asm new file mode 100644 index 0000000..8e1ed51 --- /dev/null +++ b/src/shaders/vme/batchbuffer.asm @@ -0,0 +1,164 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Xiang Haihao + */ + +/* + * __START + */ +__INTER_START: + and.z.f0.1 (1) remainder_cmds<1>:uw total_mbs<0,1,0>:uw 0x0003:uw {align1}; + and.z.f0.0 (1) total_mbs<1>:uw total_mbs<0,1,0>:uw 0xfffc:uw {align1}; + + mov (16) tmp_reg0<1>:ud 0x0:ud {align1} ; + + mov (8) media_object_ud<1>:ud 0x0:ud {align1} ; + mov (1) media_object0_ud<1>:ud CMD_MEDIA_OBJECT {align1} ; + mov (1) media_object1_ud<1>:ud mtype_ub<0,1,0>ub {align1}; + mov (1) media_object6_width<1>:uw width_in_mb<0,1,0>:uw {align1}; + mov (1) media_object7_ud<1>:ud transform_8x8_ub<0,1,0>ub {align1}; + + mul (1) tmp_reg0.8<1>:ud width_in_mb<0,1,0>:uw mb_y<0,1,0>:ub {align1}; + add (1) tmp_reg0.8<1>:ud tmp_reg0.8<0,1,0>:ud mb_x<0,1,0>:ub {align1}; + mul (1) tmp_reg0.8<1>:ud tmp_reg0.8<0,1,0>:ud 0x2:ud {align1} ; + mov (1) tmp_reg0.20<1>:ub thread_id_ub {align1}; /* dispatch id */ + + (f0.0)jmpi (1) __REMAINDER ; + +__CMD_LOOP: + mov (8) msg_reg0.0<1>:ud tmp_reg0<8,8,1>:ud {align1}; + add (1) tmp_reg0.8<1>:ud tmp_reg0.8<0,1,0>:ud 8:uw {align1} ; + + mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ; + mov (8) msg_reg1<1>:ud media_object_ud<8,8,1>:ud {align1}; + add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1}; + cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1}; + (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ; + (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ; + + mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ; + mov (8) msg_reg2<1>:ud media_object_ud<8,8,1>:ud {align1}; + add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1}; + cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1}; + (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ; + (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ; + + mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ; + mov (8) msg_reg3<1>:ud media_object_ud<8,8,1>:ud {align1}; + add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1}; + cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1}; + (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ; + (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ; + + mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ; + mov (8) msg_reg4<1>:ud media_object_ud<8,8,1>:ud {align1}; + add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1}; + cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1}; + (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ; + (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ; + +/* bind index 5, write 8 oword, msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_4, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 5 + rlen obw_wb_length + {align1}; + + + add.z.f0.0 (1) total_mbs<1>:w total_mbs<0,1,0>:w -4:w {align1}; + (-f0.0)jmpi (1) __CMD_LOOP ; + +__REMAINDER: + (f0.1)jmpi (1) __DONE ; + +__REMAINDER_LOOP: + mov (8) msg_reg0.0<1>:ud tmp_reg0<8,8,1>:ud {align1} ; + add (1) tmp_reg0.8<1>:ud tmp_reg0.8<0,1,0>:ud 2:uw {align1} ; + + mov (1) media_object6_xy<1>:uw mb_xy<1>:uw {align1} ; + mov (8) msg_reg1<1>:ud media_object_ud<8,8,1>:ud {align1}; + add (1) mb_x<1>:ub mb_x<0,1,0>:ub 1:uw {align1}; + cmp.e.f0.0 (1) null<1>:uw width_in_mb<0,1,0>:uw mb_x<0,1,0>:ub {align1}; + (f0.0)mov (1) mb_x<1>:ub 0:uw {align1} ; + (f0.0)add (1) mb_y<1>:ub mb_y<0,1,0>:ub 1:uw {align1} ; + +/* bind index 5, write 2 oword, msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + + add.z.f0.1 (1) remainder_cmds<1>:w remainder_cmds<0,1,0>:w -1:w; + (-f0.1)jmpi (1) __REMAINDER_LOOP ; + +__DONE: + + cmp.e.f0.0 (1) null<1>:uw last_object<0,1,0>:uw 1:uw {align1}; + (-f0.0)jmpi (1) __EXIT ; + +/* bind index 5, write 1 oword, msg type: 8(OWord Block Write) */ + mov (8) msg_reg0.0<1>:ud tmp_reg0<8,8,1>:ud {align1} ; + mov (4) msg_reg1.0<1>:ud 0x0:ud {align1} ; + mov (1) msg_reg1.4<1>:ud MI_BATCH_BUFFER_END {align1} ; + +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_0, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +__EXIT: + mov (8) msg_reg0<1>:ud r0<8,8,1>:ud {align1} ; + send (16) msg_ind acc0<1>ud null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT} ; diff --git a/src/shaders/vme/batchbuffer.g7a b/src/shaders/vme/batchbuffer.g7a new file mode 100644 index 0000000..a2c8864 --- /dev/null +++ b/src/shaders/vme/batchbuffer.g7a @@ -0,0 +1,29 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Xiang Haihao + */ + +include(`gen7_batchbuffer_header.inc') +include(`batchbuffer.asm') + diff --git a/src/shaders/vme/batchbuffer.g7b b/src/shaders/vme/batchbuffer.g7b new file mode 100644 index 0000000..159d8fb --- /dev/null +++ b/src/shaders/vme/batchbuffer.g7b @@ -0,0 +1,62 @@ + { 0x01000005, 0x22202d29, 0x020000a6, 0x00030003 }, + { 0x01000005, 0x20a62d29, 0x000000a6, 0xfffcfffc }, + { 0x00800001, 0x21000061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x22000061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x22000061, 0x00000000, 0x71000006 }, + { 0x00000001, 0x22040221, 0x000000a3, 0x00000000 }, + { 0x00000001, 0x221a0129, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x221c0221, 0x000000a2, 0x00000000 }, + { 0x00000041, 0x21084521, 0x000000a0, 0x000000a5 }, + { 0x00000040, 0x21084421, 0x00000108, 0x000000a4 }, + { 0x00000041, 0x21080c21, 0x00000108, 0x00000002 }, + { 0x00000001, 0x21140231, 0x00000014, 0x00000000 }, + { 0x00010020, 0x34001c00, 0x00001400, 0x0000003a }, + { 0x00600001, 0x28000021, 0x008d0100, 0x00000000 }, + { 0x00000040, 0x21082c21, 0x00000108, 0x00080008 }, + { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0200, 0x00000000 }, + { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 }, + { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 }, + { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 }, + { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 }, + { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 }, + { 0x00600001, 0x28400021, 0x008d0200, 0x00000000 }, + { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 }, + { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 }, + { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 }, + { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 }, + { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 }, + { 0x00600001, 0x28600021, 0x008d0200, 0x00000000 }, + { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 }, + { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 }, + { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 }, + { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 }, + { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 }, + { 0x00600001, 0x28800021, 0x008d0200, 0x00000000 }, + { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 }, + { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 }, + { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 }, + { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x0a0a0405 }, + { 0x01000040, 0x20a63dad, 0x000000a6, 0xfffcfffc }, + { 0x00110020, 0x34001c00, 0x00001400, 0xffffffc6 }, + { 0x00010020, 0x34001c00, 0x02001400, 0x00000016 }, + { 0x00600001, 0x28000021, 0x008d0100, 0x00000000 }, + { 0x00000040, 0x21082c21, 0x00000108, 0x00020002 }, + { 0x00000001, 0x22180129, 0x002000a4, 0x00000000 }, + { 0x00600001, 0x28200021, 0x008d0200, 0x00000000 }, + { 0x00000040, 0x20a42e31, 0x000000a4, 0x00010001 }, + { 0x01000010, 0x20004528, 0x000000a0, 0x000000a4 }, + { 0x00010001, 0x20a40171, 0x00000000, 0x00000000 }, + { 0x00010040, 0x20a52e31, 0x000000a5, 0x00010001 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0205 }, + { 0x01000040, 0x22203dad, 0x02000220, 0xffffffff }, + { 0x00110020, 0x34001c00, 0x02001400, 0xffffffea }, + { 0x01000010, 0x20002d28, 0x000000a8, 0x00010001 }, + { 0x00110020, 0x34001c00, 0x00001400, 0x00000008 }, + { 0x00600001, 0x28000021, 0x008d0100, 0x00000000 }, + { 0x00400001, 0x28200061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x28240061, 0x00000000, 0x05000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0005 }, + { 0x00600001, 0x28000021, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x24001ca0, 0x00000800, 0x82000010 }, diff --git a/src/shaders/vme/gen7_batchbuffer_header.inc b/src/shaders/vme/gen7_batchbuffer_header.inc new file mode 100644 index 0000000..d13620f --- /dev/null +++ b/src/shaders/vme/gen7_batchbuffer_header.inc @@ -0,0 +1,139 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Xiang Haihao + */ + +define(`BIND_IDX_OUTPUT', `0') +define(`BIND_IDX_VME', `1') +define(`BIND_IDX_VME_REF0', `2') +define(`BIND_IDX_VME_REF1', `3') +define(`BIND_IDX_INEP', `4') +define(`BIND_IDX_VME_BATCHBUFFER', `5') + +define(`OBW_CACHE_TYPE', `10') + +define(`OBW_MESSAGE_TYPE', `8') + +define(`OBW_BIND_IDX', `BIND_IDX_VME_BATCHBUFFER') + +define(`OBW_CONTROL_0', `0') /* 1 OWord, low 128 bits */ +define(`OBW_CONTROL_1', `1') /* 1 OWord, high 128 bits */ +define(`OBW_CONTROL_2', `2') /* 2 OWords */ +define(`OBW_CONTROL_3', `3') /* 4 OWords */ +define(`OBW_CONTROL_4', `4') /* 8 OWords */ + +define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */ + +define(`OBW_HEADER_PRESENT', `1') + +define(`CMD_MEDIA_OBJECT', `0x71000006:UD') +define(`MI_BATCH_BUFFER_END', `0x05000000:UD') + +/* GRF registers + * r0 header + * r1~r4 constant buffer (reserved) + * r5 inline data + * r6~r7 reserved + * r8~r15 temporary registers + * r16 write back of Oword Block Write + */ +/* + * GRF 0 -- header + */ +define(`thread_id_ub', `r0.20<0,1,0>:UB') /* thread id in payload */ + +/* + * GRF 1~4 -- Constant Buffer (reserved) + */ + +/* + * GRF 5 -- inline data + */ +define(`inline_reg0', `r5') +define(`width_in_mb', `inline_reg0.0') /* the picture width in macroblocks */ +define(`transform_8x8_ub', `inline_reg0.2') /* transform_8x8 flag */ +define(`mtype_ub', `inline_reg0.3') /* 0: INTRA, 1: INTER */ +define(`mb_x', `inline_reg0.4') +define(`mb_y', `inline_reg0.5') +define(`mb_xy', `inline_reg0.4') +define(`total_mbs', `inline_reg0.6') /* the number of macroblock commands + * being processed by the kernel + */ +define(`last_object', `inline_reg0.8') /* the last object flag */ +/* + * GRF 8~15 -- temporary registers + */ +define(`tmp_reg0', `r8') +define(`tmp_reg1', `r9') +define(`tmp_reg2', `r10') +define(`tmp_reg3', `r11') +define(`tmp_reg4', `r12') +define(`tmp_reg5', `r13') +define(`tmp_reg6', `r14') +define(`tmp_reg7', `r15') + +/* + * GRF 16 + */ +define(`media_object_ud', `r16.0') +define(`media_object0_ud', `r16.0') +define(`media_object1_ud', `r16.4') +define(`media_object2_ud', `r16.8') +define(`media_object3_ud', `r16.12') +define(`media_object4_ud', `r16.16') +define(`media_object5_ud', `r16.20') +define(`media_object6_ud', `r16.24') +define(`media_object6_xy', `r16.24') +define(`media_object6_width', `r16.26') +define(`media_object7_ud', `r16.28') + +/* + * GRF 17 + */ +define(`remainder_cmds', `r17.0') + +/* + * GRF 16 write back for Oword Block Write message + */ + +/* + * write commit is removed on Ivybridge + */ +define(`obw_wb', `null<1>:W') +define(`obw_wb_length', `0') + + +/* + * Message Payload registers + */ +define(`msg_ind', `64') +define(`msg_reg0', `g64') +define(`msg_reg1', `g65') +define(`msg_reg2', `g66') +define(`msg_reg3', `g67') +define(`msg_reg4', `g68') +define(`msg_reg5', `g69') +define(`msg_reg6', `g70') +define(`msg_reg7', `g71') +define(`msg_reg8', `g72') -- 2.7.4