From 17be8ff554b8e708c2b7eda504a5060ee4bff2cc Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 9 Jun 2011 13:13:24 +0800 Subject: [PATCH] i965_drv_video: new shaders for VME on Ivybridge Signed-off-by: Xiang, Haihao --- i965_drv_video/shaders/vme/Makefile.am | 31 +++- .../vme/{vme_header.inc => gen6_vme_header.inc} | 36 ++++- i965_drv_video/shaders/vme/gen7_vme_header.inc | 162 +++++++++++++++++++++ i965_drv_video/shaders/vme/inter_frame.asm | 45 ++++-- i965_drv_video/shaders/vme/inter_frame.g6a | 2 + i965_drv_video/shaders/vme/inter_frame.g6b | 1 + i965_drv_video/shaders/vme/inter_frame.g7a | 2 + i965_drv_video/shaders/vme/inter_frame.g7b | 28 ++++ i965_drv_video/shaders/vme/intra_frame.asm | 56 +++++-- i965_drv_video/shaders/vme/intra_frame.g6a | 3 + i965_drv_video/shaders/vme/intra_frame.g6b | 5 +- i965_drv_video/shaders/vme/intra_frame.g7a | 2 + i965_drv_video/shaders/vme/intra_frame.g7b | 47 ++++++ 13 files changed, 383 insertions(+), 37 deletions(-) rename i965_drv_video/shaders/vme/{vme_header.inc => gen6_vme_header.inc} (76%) create mode 100644 i965_drv_video/shaders/vme/gen7_vme_header.inc create mode 100644 i965_drv_video/shaders/vme/inter_frame.g6a create mode 100644 i965_drv_video/shaders/vme/inter_frame.g7a create mode 100644 i965_drv_video/shaders/vme/inter_frame.g7b create mode 100644 i965_drv_video/shaders/vme/intra_frame.g6a create mode 100644 i965_drv_video/shaders/vme/intra_frame.g7a create mode 100644 i965_drv_video/shaders/vme/intra_frame.g7b diff --git a/i965_drv_video/shaders/vme/Makefile.am b/i965_drv_video/shaders/vme/Makefile.am index 2a8175f..e1097a7 100644 --- a/i965_drv_video/shaders/vme/Makefile.am +++ b/i965_drv_video/shaders/vme/Makefile.am @@ -1,20 +1,37 @@ +VME_CORE = intra_frame.asm inter_frame.asm + INTEL_G6B = intra_frame.g6b inter_frame.g6b -INTEL_INC = vme_header.inc +INTEL_G6A = intra_frame.g6a inter_frame.g6a +INTEL_INC = gen6_vme_header.inc + +INTEL_G7B = intra_frame.g7b inter_frame.g7b +INTEL_G7A = intra_frame.g7a inter_frame.g7a +INTEL_INC_GEN7 = gen7_vme_header.inc EXTRA_DIST = $(INTEL_G6B) \ - $(INTEL_INC) + $(INTEL_G6A) \ + $(INTEL_INC) \ + $(INTEL_G7B) \ + $(INTEL_G7A) \ + $(INTEL_INC_GEN7) if HAVE_GEN4ASM -SUFFIXES = .asm .g6b -.asm.g6b: - m4 $*.asm > $*.g6m && intel-gen4asm -g 6 -o $@ $*.g6m && rm $*.g6m +SUFFIXES = .g6a .g6b .g7a .g7b +.g6a.g6b: + m4 $*.g6a > $*.g6m && intel-gen4asm -g 6 -o $@ $*.g6m && rm $*.g6m + +.g7a.g7b: + m4 $*.g7a > $*.g7m && intel-gen4asm -g 7 -o $@ $*.g7m && rm $*.g7m + +$(INTEL_G6B): $(INTEL_INC) $(VME_CORE) -$(INTEL_G6B): $(INTEL_INC) +$(INTEL_G7B): $(INTEL_INC_GEN7) $(VME_CORE) -BUILT_SOURCES= $(INTEL_G6B) +BUILT_SOURCES= $(INTEL_G6B) $(INTEL_G7B) clean-local: -rm -f $(INTEL_G6B) + -rm -f $(INTEL_G7B) endif diff --git a/i965_drv_video/shaders/vme/vme_header.inc b/i965_drv_video/shaders/vme/gen6_vme_header.inc similarity index 76% rename from i965_drv_video/shaders/vme/vme_header.inc rename to i965_drv_video/shaders/vme/gen6_vme_header.inc index a66b71f..1fad53b 100644 --- a/i965_drv_video/shaders/vme/vme_header.inc +++ b/i965_drv_video/shaders/vme/gen6_vme_header.inc @@ -46,16 +46,31 @@ define(`INTER_SAD_HAAR', `0x00200000') define(`INTRA_SAD_NONE', `0x00000000') define(`INTRA_SAD_HAAR', `0x00800000') -define(`INTER_PART_MASK', `0x7E000000') +define(`INTER_PART_MASK', `0x7E000000') define(`REF_REGION_SIZE', `0x2020:UW') define(`BI_SUB_MB_PART_MASK', `0x0c000000') define(`MAX_NUM_MV', `0x00000020') -define(`SEARCH_PATH_LEN', `0x00003F3F') +define(`SEARCH_PATH_LEN', `0x00003F3F') define(`INTRA_PREDICTORE_MODE', `0x11111111:UD') - + +define(`OBW_CACHE_TYPE', `5') + +define(`OBW_MESSAGE_TYPE', `8') + +define(`OBW_BIND_IDX', `BIND_IDX_OUTPUT') + +define(`OBW_CONTROL_0', `0') /* 1 OWord, low 128 bits */ +define(`OBW_CONTROL_1', `1') /* 1 OWord, high 128 bits */ +define(`OBW_CONTROL_2', `2') /* 2 OWords */ +define(`OBW_CONTROL_3', `3') /* 4 OWords */ + +define(`OBW_WRITE_COMMIT_CATEGORY', `1') /* write commit on Sandybrige */ + +define(`OBW_HEADER_PRESENT', `1') + /* GRF registers * r0 header * r1~r4 constant buffer (reserved) @@ -99,6 +114,7 @@ define(`vme_wb3', `r15') * GRF 16 -- write back for Oword Block Write message with write commit bit */ define(`obw_wb', `r16') +define(`obw_wb_length', `1') /* * GRF 18~21 -- Intra Neighbor Edge Pixels @@ -121,12 +137,22 @@ define(`tmp_reg3', `r35') /* * MRF registers */ +define(`msg_ind', `0') define(`msg_reg0', `m0') /* m0 */ define(`msg_reg1', `m1') /* m1 */ define(`msg_reg2', `m2') /* m2 */ define(`msg_reg3', `m3') /* m3 */ +define(`msg_reg4', `m4') /* m4 */ - - +/* + * VME message payload + */ +define(`vme_msg_length', `4') +define(`vme_msg_ind', `msg_ind') +define(`vme_msg_0', `msg_reg0') +define(`vme_msg_1', `msg_reg1') +define(`vme_msg_2', `msg_reg2') +define(`vme_msg_3', `vme_msg_2') +define(`vme_msg_4', `msg_reg3') diff --git a/i965_drv_video/shaders/vme/gen7_vme_header.inc b/i965_drv_video/shaders/vme/gen7_vme_header.inc new file mode 100644 index 0000000..c443acd --- /dev/null +++ b/i965_drv_video/shaders/vme/gen7_vme_header.inc @@ -0,0 +1,162 @@ +/* + * Copyright © <2010>, Intel Corporation. + * + * This program is licensed under the terms and conditions of the + * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at + * http://www.opensource.org/licenses/eclipse-1.0.php. + * + */ +// Modual name: ME_header.inc +// +// Global symbols define +// + +/* + * Constant + */ +define(`VME_MESSAGE_TYPE_INTER', `1') +define(`VME_MESSAGE_TYPE_INTRA', `2') +define(`VME_MESSAGE_TYPE_MIXED', `3') + +define(`BLOCK_32X1', `0x0000001F') +define(`BLOCK_4X16', `0x000F0003') + +define(`LUMA_INTRA_16x16_DISABLE', `0x1') +define(`LUMA_INTRA_8x8_DISABLE', `0x2') +define(`LUMA_INTRA_4x4_DISABLE', `0x4') + +define(`INTRA_PRED_AVAIL_FLAG_AE', `0x60') +define(`INTRA_PRED_AVAIL_FLAG_B', `0x10') +define(`INTRA_PRED_AVAIL_FLAG_C', `0x8') +define(`INTRA_PRED_AVAIL_FLAG_D', `0x4') + +define(`BIND_IDX_VME', `1') +define(`BIND_IDX_VME_REF0', `2') +define(`BIND_IDX_VME_REF1', `3') +define(`BIND_IDX_OUTPUT', `0') +define(`BIND_IDX_INEP', `4') + +define(`SUB_PEL_MODE_INTEGER', `0x00000000') +define(`SUB_PEL_MODE_HALF', `0x00001000') +define(`SUB_PEL_MODE_QUARTER', `0x00003000') + +define(`INTER_SAD_NONE', `0x00000000') +define(`INTER_SAD_HAAR', `0x00200000') + +define(`INTRA_SAD_NONE', `0x00000000') +define(`INTRA_SAD_HAAR', `0x00800000') + +define(`INTER_PART_MASK', `0x7E000000') + +define(`REF_REGION_SIZE', `0x2020:UW') + +define(`BI_SUB_MB_PART_MASK', `0x0c000000') +define(`MAX_NUM_MV', `0x00000020') +define(`SEARCH_PATH_LEN', `0x00003F3F') + +define(`INTRA_PREDICTORE_MODE', `0x11111111:UD') + +define(`OBW_CACHE_TYPE', `10') + +define(`OBW_MESSAGE_TYPE', `8') + +define(`OBW_BIND_IDX', `BIND_IDX_OUTPUT') + +define(`OBW_CONTROL_0', `0') /* 1 OWord, low 128 bits */ +define(`OBW_CONTROL_1', `1') /* 1 OWord, high 128 bits */ +define(`OBW_CONTROL_2', `2') /* 2 OWords */ +define(`OBW_CONTROL_3', `3') /* 4 OWords */ + +define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */ + +define(`OBW_HEADER_PRESENT', `1') + +/* GRF registers + * r0 header + * r1~r4 constant buffer (reserved) + * r5 inline data + * r6~r11 reserved + * r12 write back of VME message + * r13 write back of Oword Block Write + */ +/* + * GRF 0 -- header + */ +define(`thread_id_ub', `r0.20<0,1,0>:UB') /* thread id in payload */ + +/* + * GRF 1~4 -- Constant Buffer (reserved) + */ + +/* + * GRF 5 -- inline data + */ +define(`inline_reg0', `r5') +define(`w_in_mb_uw', `inline_reg0.2') +define(`orig_xy_ub', `inline_reg0.0') +define(`orig_x_ub', `inline_reg0.0') /* in macroblock */ +define(`orig_y_ub', `inline_reg0.1') + +/* + * GRF 6~11 -- reserved + */ + +/* + * GRF 12~15 -- write back for VME message + */ +define(`vme_wb', `r12') +define(`vme_wb0', `r12') +define(`vme_wb1', `r13') +define(`vme_wb2', `r14') +define(`vme_wb3', `r15') + +/* + * GRF 16 -- reserved + */ +/* + * write commit is removed on Ivybridge + */ +define(`obw_wb', `null<1>:W') +define(`obw_wb_length', `0') +/* + * GRF 18~21 -- Intra Neighbor Edge Pixels + */ +define(`INEP_ROW', `r18') +define(`INEP_COL0', `r20') +define(`INEP_COL1', `r21') + +/* + * temporary registers + */ +define(`tmp_reg0', `r32') +define(`tmp_reg1', `r33') +define(`intra_part_mask_ub', `tmp_reg1.28') +define(`mb_intra_struct_ub', `tmp_reg1.29') +define(`tmp_reg2', `r34') +define(`tmp_x_w', `tmp_reg2.0') +define(`tmp_reg3', `r35') + +/* + * Message Payload registers + */ +define(`msg_ind', `64') +define(`msg_reg0', `g64') +define(`msg_reg1', `g65') +define(`msg_reg2', `g66') +define(`msg_reg3', `g67') +define(`msg_reg4', `g68') + +/* + * VME message payload + */ +define(`vme_msg_length', `5') +define(`vme_msg_ind', `msg_ind') +define(`vme_msg_0', `msg_reg0') +define(`vme_msg_1', `msg_reg1') +define(`vme_msg_2', `msg_reg2') +define(`vme_msg_3', `msg_reg3') +define(`vme_msg_4', `msg_reg4') + + + + diff --git a/i965_drv_video/shaders/vme/inter_frame.asm b/i965_drv_video/shaders/vme/inter_frame.asm index 4fa539a..4dd9401 100644 --- a/i965_drv_video/shaders/vme/inter_frame.asm +++ b/i965_drv_video/shaders/vme/inter_frame.asm @@ -15,8 +15,6 @@ // Now, begin source code.... // -include(`vme_header.inc') - /* * __START */ @@ -35,22 +33,37 @@ mov (1) tmp_reg0.12<1>:UD INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_ mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ mov (1) tmp_reg0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 32x32 */ -mov (8) msg_reg0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1}; +mov (8) vme_msg_0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1}; /* m1 */ mov (1) tmp_reg1.4<1>:UD MAX_NUM_MV:UD {align1}; /* Default value MAX 32 MVs */ mov (1) tmp_reg1.8<1>:UD SEARCH_PATH_LEN:UD {align1}; -mov (8) msg_reg1<1>:UD tmp_reg1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_1<1>:UD tmp_reg1.0<8,8,1>:UD {align1}; /* m2 */ -mov (8) msg_reg2<1>:UD 0x0:UD {align1}; +mov (8) vme_msg_2<1>:UD 0x0:UD {align1}; /* m3 */ -mov (8) msg_reg3<1>:UD 0x0:UD {align1}; +mov (8) vme_msg_3<1>:UD 0x0:UD {align1}; -send (8) 0 vme_wb null vme(BIND_IDX_VME,0,0,VME_MESSAGE_TYPE_INTER) mlen 4 rlen 4 {align1}; +/* m4 */ +mov (8) vme_msg_4<1>:UD 0x0:UD {align1}; +send (8) + vme_msg_ind + vme_wb + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_MESSAGE_TYPE_INTER + ) + mlen vme_msg_length + rlen 4 + {align1}; + /* * Oword Block Write message */ @@ -68,10 +81,24 @@ mov (8) msg_reg1.0<1>:UD tmp_reg3.0<8,8,0>:UD {align1}; mov (8) msg_reg2.0<1>:UD tmp_reg3.0<8,8,0>:UD {align1}; /* bind index 3, write 4 oword, msg type: 8(OWord Block Write) */ -send (16) 0 obw_wb null write(BIND_IDX_OUTPUT, 3, 8, 1) mlen 3 rlen 1 {align1}; +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_3, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 3 + rlen obw_wb_length + {align1}; /* * kill thread */ mov (8) msg_reg0<1>:UD r0<8,8,1>:UD {align1}; -send (16) 0 acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; +send (16) msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; diff --git a/i965_drv_video/shaders/vme/inter_frame.g6a b/i965_drv_video/shaders/vme/inter_frame.g6a new file mode 100644 index 0000000..d89588f --- /dev/null +++ b/i965_drv_video/shaders/vme/inter_frame.g6a @@ -0,0 +1,2 @@ +include(`gen6_vme_header.inc') +include(`inter_frame.asm') diff --git a/i965_drv_video/shaders/vme/inter_frame.g6b b/i965_drv_video/shaders/vme/inter_frame.g6b index 915c8f6..05ee798 100644 --- a/i965_drv_video/shaders/vme/inter_frame.g6b +++ b/i965_drv_video/shaders/vme/inter_frame.g6b @@ -11,6 +11,7 @@ { 0x00000001, 0x24280061, 0x00000000, 0x00003f3f }, { 0x00600001, 0x20200022, 0x008d0420, 0x00000000 }, { 0x00600001, 0x20400062, 0x00000000, 0x00000000 }, + { 0x00600001, 0x20400062, 0x00000000, 0x00000000 }, { 0x00600001, 0x20600062, 0x00000000, 0x00000000 }, { 0x08600031, 0x21801cdd, 0x00000000, 0x08482000 }, { 0x00000041, 0x24684521, 0x000000a2, 0x000000a1 }, diff --git a/i965_drv_video/shaders/vme/inter_frame.g7a b/i965_drv_video/shaders/vme/inter_frame.g7a new file mode 100644 index 0000000..cb51f52 --- /dev/null +++ b/i965_drv_video/shaders/vme/inter_frame.g7a @@ -0,0 +1,2 @@ +include(`gen7_vme_header.inc') +include(`inter_frame.asm') diff --git a/i965_drv_video/shaders/vme/inter_frame.g7b b/i965_drv_video/shaders/vme/inter_frame.g7b new file mode 100644 index 0000000..660721d --- /dev/null +++ b/i965_drv_video/shaders/vme/inter_frame.g7b @@ -0,0 +1,28 @@ + { 0x00800001, 0x24000061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24400061, 0x00000000, 0x00000000 }, + { 0x00200041, 0x24082e29, 0x004500a0, 0x00100010 }, + { 0x00200041, 0x24002e29, 0x004500a0, 0x00100010 }, + { 0x00200040, 0x24003dad, 0x00450400, 0xfff8fff8 }, + { 0x00000001, 0x240c0061, 0x00000000, 0x7e203000 }, + { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x24160169, 0x00000000, 0x20202020 }, + { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 }, + { 0x00000001, 0x24240061, 0x00000000, 0x00000020 }, + { 0x00000001, 0x24280061, 0x00000000, 0x00003f3f }, + { 0x00600001, 0x28200021, 0x008d0420, 0x00000000 }, + { 0x00600001, 0x28400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28600061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28800061, 0x00000000, 0x00000000 }, + { 0x08600031, 0x21801cbd, 0x00000800, 0x0a482001 }, + { 0x00000041, 0x24684521, 0x000000a2, 0x000000a1 }, + { 0x00000040, 0x24684421, 0x00000468, 0x000000a0 }, + { 0x00000041, 0x24680c21, 0x00000468, 0x00000004 }, + { 0x00000001, 0x24740231, 0x00000014, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0460, 0x00000000 }, + { 0x00200001, 0x24600229, 0x004501a0, 0x00000000 }, + { 0x00200040, 0x24603dad, 0x00b10460, 0xffe0ffe0 }, + { 0x00600001, 0x28200021, 0x008c0460, 0x00000000 }, + { 0x00600001, 0x28400021, 0x008c0460, 0x00000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x060a0300 }, + { 0x00600001, 0x28000021, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x24001ca8, 0x00000800, 0x82000010 }, diff --git a/i965_drv_video/shaders/vme/intra_frame.asm b/i965_drv_video/shaders/vme/intra_frame.asm index 65cd58b..2d2e688 100644 --- a/i965_drv_video/shaders/vme/intra_frame.asm +++ b/i965_drv_video/shaders/vme/intra_frame.asm @@ -15,8 +15,6 @@ // Now, begin source code.... // -include(`vme_header.inc') - /* * __START */ @@ -34,7 +32,7 @@ add (1) tmp_reg0.4<1>:D tmp_reg0.4<0,1,0>:D -1:W {align1}; /* Y offs mov (1) tmp_reg0.8<1>:UD BLOCK_32X1 {align1}; mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ mov (8) msg_reg0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1}; -send (16) 0 INEP_ROW null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1}; +send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1}; /* COL */ mul (2) tmp_reg0.0<1>:D orig_xy_ub<2,2,1>:UB 16:UW {align1}; /* (x, y) * 16 */ @@ -42,7 +40,7 @@ add (1) tmp_reg0.0<1>:D tmp_reg0.0<0,1,0>:D -4:W {align1}; /* X offs mov (1) tmp_reg0.8<1>:UD BLOCK_4X16 {align1}; mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ mov (8) msg_reg0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1}; -send (16) 0 INEP_COL0 null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1}; +send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1}; /* * VME message @@ -50,7 +48,7 @@ send (16) 0 INEP_COL0 null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1}; /* m0 */ mul (2) tmp_reg0.8<1>:UW orig_xy_ub<2,2,1>:UB 16:UW {align1}; /* (x, y) * 16 */ mov (1) tmp_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ -mov (8) msg_reg0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1}; +mov (8) vme_msg_0.0<1>:UD tmp_reg0.0<8,8,1>:UD {align1}; /* m1 */ mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE + LUMA_INTRA_4x4_DISABLE {align1}; @@ -69,16 +67,32 @@ add (1) tmp_x_w<1>:W w_in_mb_uw<0,1,0>:UW -tmp_x_w<0,1,0>:W {align1}; mul.nz.f0.0 (1) null<1>:UD tmp_x_w<0,1,0>:W orig_y_ub<0,1,0>:UB {align1}; /* (width - (X + 1)) * Y != 0 */ (f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_C {align1}; /* C */ -mov (8) msg_reg1<1>:UD tmp_reg1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_1<1>:UD tmp_reg1.0<8,8,1>:UD {align1}; /* m2 */ -mov (8) msg_reg2<1>:UD INEP_ROW.0<8,8,1>:UD {align1}; - +mov (8) vme_msg_2<1>:UD 0x0:UD {align1}; + /* m3 */ -mov (8) msg_reg3<1>:UD 0x0 {align1}; -mov (16) msg_reg3.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1}; -mov (1) msg_reg3.16<1>:UD INTRA_PREDICTORE_MODE {align1}; -send (8) 0 vme_wb null vme(BIND_IDX_VME,0,0,VME_MESSAGE_TYPE_INTRA) mlen 4 rlen 1 {align1}; +mov (8) vme_msg_3<1>:UD INEP_ROW.0<8,8,1>:UD {align1}; + +/* m4 */ +mov (8) vme_msg_4<1>:UD 0x0 {align1}; +mov (16) vme_msg_4.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1}; +mov (1) vme_msg_4.16<1>:UD INTRA_PREDICTORE_MODE {align1}; + +send (8) + vme_msg_ind + vme_wb + null + vme( + BIND_IDX_VME, + 0, + 0, + VME_MESSAGE_TYPE_INTRA + ) + mlen vme_msg_length + rlen 1 + {align1}; /* * Oword Block Write message @@ -93,10 +107,24 @@ mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1}; mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1}; mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1}; /* bind index 3, write 1 oword, msg type: 8(OWord Block Write) */ -send (16) 0 obw_wb null write(BIND_IDX_OUTPUT, 0, 8, 1) mlen 2 rlen 1 {align1}; +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_0, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; /* * kill thread */ mov (8) msg_reg0<1>:UD r0<8,8,1>:UD {align1}; -send (16) 0 acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; +send (16) msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; diff --git a/i965_drv_video/shaders/vme/intra_frame.g6a b/i965_drv_video/shaders/vme/intra_frame.g6a new file mode 100644 index 0000000..d39118c --- /dev/null +++ b/i965_drv_video/shaders/vme/intra_frame.g6a @@ -0,0 +1,3 @@ +include(`gen6_vme_header.inc') +include(`intra_frame.asm') + diff --git a/i965_drv_video/shaders/vme/intra_frame.g6b b/i965_drv_video/shaders/vme/intra_frame.g6b index c5fc865..90ee252 100644 --- a/i965_drv_video/shaders/vme/intra_frame.g6b +++ b/i965_drv_video/shaders/vme/intra_frame.g6b @@ -6,13 +6,13 @@ { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f }, { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 }, - { 0x04800031, 0x22401cdd, 0x00000000, 0x02188004 }, + { 0x04600031, 0x22401cd1, 0x00000000, 0x02188004 }, { 0x00200041, 0x24002e25, 0x004500a0, 0x00100010 }, { 0x00000040, 0x24003ca5, 0x00000400, 0xfffcfffc }, { 0x00000001, 0x240800e1, 0x00000000, 0x000f0003 }, { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 }, - { 0x04800031, 0x22801cdd, 0x00000000, 0x02288004 }, + { 0x04600031, 0x22801cd1, 0x00000000, 0x02288004 }, { 0x00200041, 0x24082e29, 0x004500a0, 0x00100010 }, { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 }, @@ -28,6 +28,7 @@ { 0x02000041, 0x200045a0, 0x00000440, 0x000000a1 }, { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000008 }, { 0x00600001, 0x20200022, 0x008d0420, 0x00000000 }, + { 0x00600001, 0x20400062, 0x00000000, 0x00000000 }, { 0x00600001, 0x20400022, 0x008d0240, 0x00000000 }, { 0x00600001, 0x206000e2, 0x00000000, 0x00000000 }, { 0x00800001, 0x20600232, 0x00cf0283, 0x00000000 }, diff --git a/i965_drv_video/shaders/vme/intra_frame.g7a b/i965_drv_video/shaders/vme/intra_frame.g7a new file mode 100644 index 0000000..c43e739 --- /dev/null +++ b/i965_drv_video/shaders/vme/intra_frame.g7a @@ -0,0 +1,2 @@ +include(`gen7_vme_header.inc') +include(`intra_frame.asm') diff --git a/i965_drv_video/shaders/vme/intra_frame.g7b b/i965_drv_video/shaders/vme/intra_frame.g7b new file mode 100644 index 0000000..cc063d8 --- /dev/null +++ b/i965_drv_video/shaders/vme/intra_frame.g7b @@ -0,0 +1,47 @@ + { 0x00800001, 0x24000061, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24400061, 0x00000000, 0x00000000 }, + { 0x00200041, 0x24002e25, 0x004500a0, 0x00100010 }, + { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 }, + { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff }, + { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f }, + { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x22401cb1, 0x00000800, 0x02190004 }, + { 0x00200041, 0x24002e25, 0x004500a0, 0x00100010 }, + { 0x00000040, 0x24003ca5, 0x00000400, 0xfffcfffc }, + { 0x00000001, 0x240800e1, 0x00000000, 0x000f0003 }, + { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x22801cb1, 0x00000800, 0x02290004 }, + { 0x00200041, 0x24082e29, 0x004500a0, 0x00100010 }, + { 0x00000001, 0x24140231, 0x00000014, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 }, + { 0x00000001, 0x243c00f1, 0x00000000, 0x00000006 }, + { 0x02000010, 0x20002e28, 0x000000a0, 0x00000000 }, + { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000060 }, + { 0x02000010, 0x20002e28, 0x000000a1, 0x00000000 }, + { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000010 }, + { 0x02000041, 0x20004628, 0x000000a0, 0x000000a1 }, + { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000004 }, + { 0x00000040, 0x24402e2d, 0x000000a0, 0x00010001 }, + { 0x00000040, 0x2440352d, 0x000000a2, 0x00004440 }, + { 0x02000041, 0x200045a0, 0x00000440, 0x000000a1 }, + { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000008 }, + { 0x00600001, 0x28200021, 0x008d0420, 0x00000000 }, + { 0x00600001, 0x28400061, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28600021, 0x008d0240, 0x00000000 }, + { 0x00600001, 0x288000e1, 0x00000000, 0x00000000 }, + { 0x00800001, 0x28800231, 0x00cf0283, 0x00000000 }, + { 0x00000001, 0x28900061, 0x00000000, 0x11111111 }, + { 0x08600031, 0x21801cbd, 0x00000800, 0x0a184001 }, + { 0x00000041, 0x24684521, 0x000000a2, 0x000000a1 }, + { 0x00000040, 0x24684421, 0x00000468, 0x000000a0 }, + { 0x00000001, 0x24740231, 0x00000014, 0x00000000 }, + { 0x00600001, 0x28000021, 0x008d0460, 0x00000000 }, + { 0x00000001, 0x28200021, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240021, 0x00000190, 0x00000000 }, + { 0x00000001, 0x28280021, 0x00000194, 0x00000000 }, + { 0x00000001, 0x282c0021, 0x00000198, 0x00000000 }, + { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0000 }, + { 0x00600001, 0x28000021, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x24001ca8, 0x00000800, 0x82000010 }, -- 2.7.4