i965_drv_video: new shaders for VME on Ivybridge
authorXiang, Haihao <haihao.xiang@intel.com>
Thu, 9 Jun 2011 05:13:24 +0000 (13:13 +0800)
committerXiang, Haihao <haihao.xiang@intel.com>
Fri, 10 Jun 2011 03:08:42 +0000 (11:08 +0800)
Signed-off-by: Xiang, Haihao <haihao.xiang@intel.com>
13 files changed:
i965_drv_video/shaders/vme/Makefile.am
i965_drv_video/shaders/vme/gen6_vme_header.inc [moved from i965_drv_video/shaders/vme/vme_header.inc with 76% similarity]
i965_drv_video/shaders/vme/gen7_vme_header.inc [new file with mode: 0644]
i965_drv_video/shaders/vme/inter_frame.asm
i965_drv_video/shaders/vme/inter_frame.g6a [new file with mode: 0644]
i965_drv_video/shaders/vme/inter_frame.g6b
i965_drv_video/shaders/vme/inter_frame.g7a [new file with mode: 0644]
i965_drv_video/shaders/vme/inter_frame.g7b [new file with mode: 0644]
i965_drv_video/shaders/vme/intra_frame.asm
i965_drv_video/shaders/vme/intra_frame.g6a [new file with mode: 0644]
i965_drv_video/shaders/vme/intra_frame.g6b
i965_drv_video/shaders/vme/intra_frame.g7a [new file with mode: 0644]
i965_drv_video/shaders/vme/intra_frame.g7b [new file with mode: 0644]

index 2a8175f..e1097a7 100644 (file)
@@ -1,20 +1,37 @@
+VME_CORE  = intra_frame.asm inter_frame.asm
+
 INTEL_G6B = intra_frame.g6b inter_frame.g6b
-INTEL_INC = vme_header.inc
+INTEL_G6A = intra_frame.g6a inter_frame.g6a
+INTEL_INC = gen6_vme_header.inc
+
+INTEL_G7B = intra_frame.g7b inter_frame.g7b
+INTEL_G7A = intra_frame.g7a inter_frame.g7a
+INTEL_INC_GEN7 = gen7_vme_header.inc
 
 EXTRA_DIST = $(INTEL_G6B)      \
-       $(INTEL_INC)
+       $(INTEL_G6A)            \
+       $(INTEL_INC)            \
+       $(INTEL_G7B)            \
+       $(INTEL_G7A)            \
+       $(INTEL_INC_GEN7)
 
 if HAVE_GEN4ASM
 
-SUFFIXES = .asm .g6b
-.asm.g6b:
-       m4 $*.asm > $*.g6m && intel-gen4asm -g 6 -o $@ $*.g6m && rm $*.g6m
+SUFFIXES = .g6a .g6b .g7a .g7b
+.g6a.g6b:
+       m4 $*.g6a > $*.g6m && intel-gen4asm -g 6 -o $@ $*.g6m && rm $*.g6m
+
+.g7a.g7b:
+       m4 $*.g7a > $*.g7m && intel-gen4asm -g 7 -o $@ $*.g7m && rm $*.g7m
+
+$(INTEL_G6B): $(INTEL_INC) $(VME_CORE)
 
-$(INTEL_G6B): $(INTEL_INC)
+$(INTEL_G7B): $(INTEL_INC_GEN7) $(VME_CORE)
 
-BUILT_SOURCES= $(INTEL_G6B)
+BUILT_SOURCES= $(INTEL_G6B) $(INTEL_G7B)
 
 clean-local:
        -rm -f $(INTEL_G6B)
+       -rm -f $(INTEL_G7B)
 
 endif    
similarity index 76%
rename from i965_drv_video/shaders/vme/vme_header.inc
rename to i965_drv_video/shaders/vme/gen6_vme_header.inc
index a66b71f..1fad53b 100644 (file)
@@ -46,16 +46,31 @@ define(`INTER_SAD_HAAR',                `0x00200000')
 define(`INTRA_SAD_NONE',                `0x00000000')
 define(`INTRA_SAD_HAAR',                `0x00800000')
 
-define(`INTER_PART_MASK',                              `0x7E000000')
+define(`INTER_PART_MASK',               `0x7E000000')
 
 define(`REF_REGION_SIZE',               `0x2020:UW')
 
 define(`BI_SUB_MB_PART_MASK',           `0x0c000000')
 define(`MAX_NUM_MV',                    `0x00000020')
-define(`SEARCH_PATH_LEN',                              `0x00003F3F')
+define(`SEARCH_PATH_LEN',               `0x00003F3F')
  
 define(`INTRA_PREDICTORE_MODE',         `0x11111111:UD')
-        
+
+define(`OBW_CACHE_TYPE',                `5')
+
+define(`OBW_MESSAGE_TYPE',              `8')
+
+define(`OBW_BIND_IDX',                  `BIND_IDX_OUTPUT')
+
+define(`OBW_CONTROL_0',                 `0')    /* 1 OWord, low 128 bits */
+define(`OBW_CONTROL_1',                 `1')    /* 1 OWord, high 128 bits */
+define(`OBW_CONTROL_2',                 `2')    /* 2 OWords */
+define(`OBW_CONTROL_3',                 `3')    /* 4 OWords */
+
+define(`OBW_WRITE_COMMIT_CATEGORY',     `1')    /* write commit on Sandybrige */
+
+define(`OBW_HEADER_PRESENT',            `1')
+
 /* GRF registers
  * r0 header
  * r1~r4 constant buffer (reserved)
@@ -99,6 +114,7 @@ define(`vme_wb3',               `r15')
  * GRF 16 -- write back for Oword Block Write message with write commit bit
  */        
 define(`obw_wb',                `r16')
+define(`obw_wb_length',         `1')
 
 /*
  * GRF 18~21 -- Intra Neighbor Edge Pixels
@@ -121,12 +137,22 @@ define(`tmp_reg3',              `r35')
 /*
  * MRF registers
  */        
+define(`msg_ind',               `0')
 define(`msg_reg0',              `m0')               /* m0 */
 define(`msg_reg1',              `m1')               /* m1 */
 define(`msg_reg2',              `m2')               /* m2 */
 define(`msg_reg3',              `m3')               /* m3 */
+define(`msg_reg4',              `m4')               /* m4 */
 
-
-
+/*
+ * VME message payload
+ */
+define(`vme_msg_length',        `4')
+define(`vme_msg_ind',           `msg_ind')
+define(`vme_msg_0',             `msg_reg0')
+define(`vme_msg_1',             `msg_reg1')
+define(`vme_msg_2',             `msg_reg2')
+define(`vme_msg_3',             `vme_msg_2')
+define(`vme_msg_4',             `msg_reg3')
 
 
diff --git a/i965_drv_video/shaders/vme/gen7_vme_header.inc b/i965_drv_video/shaders/vme/gen7_vme_header.inc
new file mode 100644 (file)
index 0000000..c443acd
--- /dev/null
@@ -0,0 +1,162 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+// Modual name: ME_header.inc
+//
+// Global symbols define
+//
+
+/*
+ * Constant
+ */
+define(`VME_MESSAGE_TYPE_INTER',        `1')
+define(`VME_MESSAGE_TYPE_INTRA',        `2')
+define(`VME_MESSAGE_TYPE_MIXED',        `3')
+        
+define(`BLOCK_32X1',                    `0x0000001F')
+define(`BLOCK_4X16',                    `0x000F0003')
+        
+define(`LUMA_INTRA_16x16_DISABLE',      `0x1')
+define(`LUMA_INTRA_8x8_DISABLE',        `0x2')
+define(`LUMA_INTRA_4x4_DISABLE',        `0x4')
+
+define(`INTRA_PRED_AVAIL_FLAG_AE',      `0x60')
+define(`INTRA_PRED_AVAIL_FLAG_B',       `0x10')
+define(`INTRA_PRED_AVAIL_FLAG_C',       `0x8')
+define(`INTRA_PRED_AVAIL_FLAG_D',       `0x4')
+
+define(`BIND_IDX_VME',                  `1')
+define(`BIND_IDX_VME_REF0',             `2')
+define(`BIND_IDX_VME_REF1',             `3')
+define(`BIND_IDX_OUTPUT',               `0')
+define(`BIND_IDX_INEP',                 `4')
+
+define(`SUB_PEL_MODE_INTEGER',          `0x00000000')
+define(`SUB_PEL_MODE_HALF',             `0x00001000')
+define(`SUB_PEL_MODE_QUARTER',          `0x00003000')
+
+define(`INTER_SAD_NONE',                `0x00000000')
+define(`INTER_SAD_HAAR',                `0x00200000')
+
+define(`INTRA_SAD_NONE',                `0x00000000')
+define(`INTRA_SAD_HAAR',                `0x00800000')
+
+define(`INTER_PART_MASK',               `0x7E000000')
+
+define(`REF_REGION_SIZE',               `0x2020:UW')
+
+define(`BI_SUB_MB_PART_MASK',           `0x0c000000')
+define(`MAX_NUM_MV',                    `0x00000020')
+define(`SEARCH_PATH_LEN',               `0x00003F3F')
+        
+define(`INTRA_PREDICTORE_MODE',         `0x11111111:UD')
+
+define(`OBW_CACHE_TYPE',                `10')
+
+define(`OBW_MESSAGE_TYPE',              `8')
+
+define(`OBW_BIND_IDX',                  `BIND_IDX_OUTPUT')
+
+define(`OBW_CONTROL_0',                 `0')    /* 1 OWord, low 128 bits */
+define(`OBW_CONTROL_1',                 `1')    /* 1 OWord, high 128 bits */
+define(`OBW_CONTROL_2',                 `2')    /* 2 OWords */
+define(`OBW_CONTROL_3',                 `3')    /* 4 OWords */
+
+define(`OBW_WRITE_COMMIT_CATEGORY',     `0')    /* category on Ivybridge */
+
+define(`OBW_HEADER_PRESENT',            `1')
+
+/* GRF registers
+ * r0 header
+ * r1~r4 constant buffer (reserved)
+ * r5 inline data
+ * r6~r11 reserved        
+ * r12 write back of VME message
+ * r13 write back of Oword Block Write        
+ */
+/*
+ * GRF 0 -- header       
+ */        
+define(`thread_id_ub',          `r0.20<0,1,0>:UB')  /* thread id in payload */
+
+/*
+ * GRF 1~4 -- Constant Buffer (reserved)
+ */
+        
+/*
+ * GRF 5 -- inline data
+ */        
+define(`inline_reg0',           `r5')
+define(`w_in_mb_uw',            `inline_reg0.2')
+define(`orig_xy_ub',            `inline_reg0.0')
+define(`orig_x_ub',             `inline_reg0.0')    /* in macroblock */    
+define(`orig_y_ub',             `inline_reg0.1')
+
+/*
+ * GRF 6~11 -- reserved
+ */
+
+/*
+ * GRF 12~15 -- write back for VME message 
+ */
+define(`vme_wb',                `r12')
+define(`vme_wb0',               `r12')
+define(`vme_wb1',               `r13')
+define(`vme_wb2',               `r14')
+define(`vme_wb3',               `r15')
+        
+/*
+ * GRF 16 -- reserved
+ */        
+/*
+ * write commit is removed on Ivybridge
+ */
+define(`obw_wb',                `null<1>:W')
+define(`obw_wb_length',         `0')
+/*
+ * GRF 18~21 -- Intra Neighbor Edge Pixels
+ */
+define(`INEP_ROW',              `r18')
+define(`INEP_COL0',             `r20')
+define(`INEP_COL1',             `r21')
+        
+/*
+ * temporary registers
+ */
+define(`tmp_reg0',              `r32')
+define(`tmp_reg1',              `r33')
+define(`intra_part_mask_ub',    `tmp_reg1.28')        
+define(`mb_intra_struct_ub',    `tmp_reg1.29')
+define(`tmp_reg2',              `r34')
+define(`tmp_x_w',               `tmp_reg2.0')
+define(`tmp_reg3',              `r35')
+
+/*
+ * Message Payload registers
+ */
+define(`msg_ind',               `64')
+define(`msg_reg0',              `g64')
+define(`msg_reg1',              `g65')
+define(`msg_reg2',              `g66')
+define(`msg_reg3',              `g67')
+define(`msg_reg4',              `g68')
+
+/*
+ * VME message payload
+ */
+define(`vme_msg_length',        `5')
+define(`vme_msg_ind',           `msg_ind')
+define(`vme_msg_0',             `msg_reg0')
+define(`vme_msg_1',             `msg_reg1')
+define(`vme_msg_2',             `msg_reg2')
+define(`vme_msg_3',             `msg_reg3')
+define(`vme_msg_4',             `msg_reg4')
+
+
+
+
index 4fa539a..4dd9401 100644 (file)
@@ -15,8 +15,6 @@
 //  Now, begin source code....
 //
 
-include(`vme_header.inc')
-
 /*
  * __START
  */
@@ -35,22 +33,37 @@ mov  (1) tmp_reg0.12<1>:UD      INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_
 
 mov  (1) tmp_reg0.20<1>:UB      thread_id_ub {align1};                  /* dispatch id */
 mov  (1) tmp_reg0.22<1>:UW      REF_REGION_SIZE {align1};               /* Reference Width&Height, 32x32 */
-mov  (8) msg_reg0.0<1>:UD       tmp_reg0.0<8,8,1>:UD {align1};
+mov  (8) vme_msg_0.0<1>:UD      tmp_reg0.0<8,8,1>:UD {align1};
         
 /* m1 */
 mov  (1) tmp_reg1.4<1>:UD       MAX_NUM_MV:UD {align1};                                   /* Default value MAX 32 MVs */
 mov  (1) tmp_reg1.8<1>:UD              SEARCH_PATH_LEN:UD {align1};
 
-mov  (8) msg_reg1<1>:UD         tmp_reg1.0<8,8,1>:UD {align1};
+mov  (8) vme_msg_1<1>:UD        tmp_reg1.0<8,8,1>:UD {align1};
         
 /* m2 */        
-mov  (8) msg_reg2<1>:UD         0x0:UD {align1};
+mov  (8) vme_msg_2<1>:UD        0x0:UD {align1};
 
 /* m3 */        
-mov  (8) msg_reg3<1>:UD         0x0:UD {align1};
+mov  (8) vme_msg_3<1>:UD        0x0:UD {align1};
 
-send (8) 0 vme_wb null vme(BIND_IDX_VME,0,0,VME_MESSAGE_TYPE_INTER) mlen 4 rlen 4 {align1};
+/* m4 */        
+mov  (8) vme_msg_4<1>:UD        0x0:UD {align1};
 
+send (8)
+        vme_msg_ind
+        vme_wb
+        null
+        vme(
+                BIND_IDX_VME,
+                0,
+                0,
+                VME_MESSAGE_TYPE_INTER
+        )
+        mlen vme_msg_length
+        rlen 4
+        {align1};
+        
 /*
  * Oword Block Write message
  */
@@ -68,10 +81,24 @@ mov  (8) msg_reg1.0<1>:UD       tmp_reg3.0<8,8,0>:UD   {align1};
 mov  (8) msg_reg2.0<1>:UD       tmp_reg3.0<8,8,0>:UD   {align1};
 
 /* bind index 3, write 4 oword, msg type: 8(OWord Block Write) */
-send (16) 0 obw_wb null write(BIND_IDX_OUTPUT, 3, 8, 1) mlen 3 rlen 1 {align1};
+send (16)
+        msg_ind
+        obw_wb
+        null
+        data_port(
+                OBW_CACHE_TYPE,
+                OBW_MESSAGE_TYPE,
+                OBW_CONTROL_3,
+                OBW_BIND_IDX,
+                OBW_WRITE_COMMIT_CATEGORY,
+                OBW_HEADER_PRESENT
+        )
+        mlen 3
+        rlen obw_wb_length
+        {align1};
         
 /*
  * kill thread
  */        
 mov  (8) msg_reg0<1>:UD         r0<8,8,1>:UD {align1};
-send (16) 0 acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
+send (16) msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
diff --git a/i965_drv_video/shaders/vme/inter_frame.g6a b/i965_drv_video/shaders/vme/inter_frame.g6a
new file mode 100644 (file)
index 0000000..d89588f
--- /dev/null
@@ -0,0 +1,2 @@
+include(`gen6_vme_header.inc')
+include(`inter_frame.asm')
index 915c8f6..05ee798 100644 (file)
@@ -11,6 +11,7 @@
    { 0x00000001, 0x24280061, 0x00000000, 0x00003f3f },
    { 0x00600001, 0x20200022, 0x008d0420, 0x00000000 },
    { 0x00600001, 0x20400062, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x20400062, 0x00000000, 0x00000000 },
    { 0x00600001, 0x20600062, 0x00000000, 0x00000000 },
    { 0x08600031, 0x21801cdd, 0x00000000, 0x08482000 },
    { 0x00000041, 0x24684521, 0x000000a2, 0x000000a1 },
diff --git a/i965_drv_video/shaders/vme/inter_frame.g7a b/i965_drv_video/shaders/vme/inter_frame.g7a
new file mode 100644 (file)
index 0000000..cb51f52
--- /dev/null
@@ -0,0 +1,2 @@
+include(`gen7_vme_header.inc')
+include(`inter_frame.asm')
diff --git a/i965_drv_video/shaders/vme/inter_frame.g7b b/i965_drv_video/shaders/vme/inter_frame.g7b
new file mode 100644 (file)
index 0000000..660721d
--- /dev/null
@@ -0,0 +1,28 @@
+   { 0x00800001, 0x24000061, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x24400061, 0x00000000, 0x00000000 },
+   { 0x00200041, 0x24082e29, 0x004500a0, 0x00100010 },
+   { 0x00200041, 0x24002e29, 0x004500a0, 0x00100010 },
+   { 0x00200040, 0x24003dad, 0x00450400, 0xfff8fff8 },
+   { 0x00000001, 0x240c0061, 0x00000000, 0x7e203000 },
+   { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
+   { 0x00000001, 0x24160169, 0x00000000, 0x20202020 },
+   { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 },
+   { 0x00000001, 0x24240061, 0x00000000, 0x00000020 },
+   { 0x00000001, 0x24280061, 0x00000000, 0x00003f3f },
+   { 0x00600001, 0x28200021, 0x008d0420, 0x00000000 },
+   { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+   { 0x08600031, 0x21801cbd, 0x00000800, 0x0a482001 },
+   { 0x00000041, 0x24684521, 0x000000a2, 0x000000a1 },
+   { 0x00000040, 0x24684421, 0x00000468, 0x000000a0 },
+   { 0x00000041, 0x24680c21, 0x00000468, 0x00000004 },
+   { 0x00000001, 0x24740231, 0x00000014, 0x00000000 },
+   { 0x00600001, 0x28000021, 0x008d0460, 0x00000000 },
+   { 0x00200001, 0x24600229, 0x004501a0, 0x00000000 },
+   { 0x00200040, 0x24603dad, 0x00b10460, 0xffe0ffe0 },
+   { 0x00600001, 0x28200021, 0x008c0460, 0x00000000 },
+   { 0x00600001, 0x28400021, 0x008c0460, 0x00000000 },
+   { 0x0a800031, 0x20001cac, 0x00000800, 0x060a0300 },
+   { 0x00600001, 0x28000021, 0x008d0000, 0x00000000 },
+   { 0x07800031, 0x24001ca8, 0x00000800, 0x82000010 },
index 65cd58b..2d2e688 100644 (file)
@@ -15,8 +15,6 @@
 //  Now, begin source code....
 //
 
-include(`vme_header.inc')
-
 /*
  * __START
  */
@@ -34,7 +32,7 @@ add  (1) tmp_reg0.4<1>:D        tmp_reg0.4<0,1,0>:D -1:W {align1};     /* Y offs
 mov  (1) tmp_reg0.8<1>:UD       BLOCK_32X1 {align1};
 mov  (1) tmp_reg0.20<1>:UB      thread_id_ub {align1};                  /* dispatch id */
 mov  (8) msg_reg0.0<1>:UD       tmp_reg0.0<8,8,1>:UD {align1};        
-send (16) 0 INEP_ROW null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
+send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
 
 /* COL */
 mul  (2) tmp_reg0.0<1>:D        orig_xy_ub<2,2,1>:UB 16:UW {align1};    /* (x, y) * 16 */
@@ -42,7 +40,7 @@ add  (1) tmp_reg0.0<1>:D        tmp_reg0.0<0,1,0>:D -4:W {align1};     /* X offs
 mov  (1) tmp_reg0.8<1>:UD       BLOCK_4X16 {align1};
 mov  (1) tmp_reg0.20<1>:UB      thread_id_ub {align1};                  /* dispatch id */
 mov  (8) msg_reg0.0<1>:UD       tmp_reg0.0<8,8,1>:UD {align1};                
-send (16) 0 INEP_COL0 null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
+send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
         
 /*
  * VME message
@@ -50,7 +48,7 @@ send (16) 0 INEP_COL0 null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
 /* m0 */        
 mul  (2) tmp_reg0.8<1>:UW       orig_xy_ub<2,2,1>:UB 16:UW {align1};    /* (x, y) * 16 */
 mov  (1) tmp_reg0.20<1>:UB      thread_id_ub {align1};                  /* dispatch id */
-mov  (8) msg_reg0.0<1>:UD       tmp_reg0.0<8,8,1>:UD {align1};
+mov  (8) vme_msg_0.0<1>:UD      tmp_reg0.0<8,8,1>:UD {align1};
 
 /* m1 */
 mov  (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE + LUMA_INTRA_4x4_DISABLE {align1};
@@ -69,16 +67,32 @@ add  (1) tmp_x_w<1>:W w_in_mb_uw<0,1,0>:UW -tmp_x_w<0,1,0>:W {align1};
 mul.nz.f0.0 (1) null<1>:UD tmp_x_w<0,1,0>:W orig_y_ub<0,1,0>:UB {align1};                                       /* (width - (X + 1)) * Y != 0 */
 (f0.0) add (1) mb_intra_struct_ub<1>:UB mb_intra_struct_ub<0,1,0>:UB INTRA_PRED_AVAIL_FLAG_C {align1};          /* C */
 
-mov  (8) msg_reg1<1>:UD         tmp_reg1.0<8,8,1>:UD {align1};
+mov  (8) vme_msg_1<1>:UD        tmp_reg1.0<8,8,1>:UD {align1};
 
 /* m2 */        
-mov  (8) msg_reg2<1>:UD         INEP_ROW.0<8,8,1>:UD {align1};
-
+mov (8) vme_msg_2<1>:UD         0x0:UD {align1};
+        
 /* m3 */        
-mov  (8) msg_reg3<1>:UD         0x0 {align1};
-mov (16) msg_reg3.0<1>:UB       INEP_COL0.3<32,8,4>:UB {align1};
-mov  (1) msg_reg3.16<1>:UD      INTRA_PREDICTORE_MODE {align1};
-send (8) 0 vme_wb null vme(BIND_IDX_VME,0,0,VME_MESSAGE_TYPE_INTRA) mlen 4 rlen 1 {align1};
+mov  (8) vme_msg_3<1>:UD         INEP_ROW.0<8,8,1>:UD {align1};
+
+/* m4 */        
+mov  (8) vme_msg_4<1>:UD         0x0 {align1};
+mov (16) vme_msg_4.0<1>:UB       INEP_COL0.3<32,8,4>:UB {align1};
+mov  (1) vme_msg_4.16<1>:UD      INTRA_PREDICTORE_MODE {align1};
+        
+send (8)
+        vme_msg_ind
+        vme_wb
+        null
+        vme(
+                BIND_IDX_VME,
+                0,
+                0,
+                VME_MESSAGE_TYPE_INTRA
+        )
+        mlen vme_msg_length
+        rlen 1
+        {align1};
 
 /*
  * Oword Block Write message
@@ -93,10 +107,24 @@ mov  (1) msg_reg1.4<1>:UD       vme_wb.16<0,1,0>:UD     {align1};
 mov  (1) msg_reg1.8<1>:UD       vme_wb.20<0,1,0>:UD     {align1};
 mov  (1) msg_reg1.12<1>:UD      vme_wb.24<0,1,0>:UD     {align1};
 /* bind index 3, write 1 oword, msg type: 8(OWord Block Write) */
-send (16) 0 obw_wb null write(BIND_IDX_OUTPUT, 0, 8, 1) mlen 2 rlen 1 {align1};
+send (16)
+        msg_ind
+        obw_wb
+        null
+        data_port(
+                OBW_CACHE_TYPE,
+                OBW_MESSAGE_TYPE,
+                OBW_CONTROL_0,
+                OBW_BIND_IDX,
+                OBW_WRITE_COMMIT_CATEGORY,
+                OBW_HEADER_PRESENT
+        )
+        mlen 2
+        rlen obw_wb_length
+        {align1};
 
 /*
  * kill thread
  */        
 mov  (8) msg_reg0<1>:UD         r0<8,8,1>:UD {align1};
-send (16) 0 acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
+send (16) msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
diff --git a/i965_drv_video/shaders/vme/intra_frame.g6a b/i965_drv_video/shaders/vme/intra_frame.g6a
new file mode 100644 (file)
index 0000000..d39118c
--- /dev/null
@@ -0,0 +1,3 @@
+include(`gen6_vme_header.inc')
+include(`intra_frame.asm')
+
index c5fc865..90ee252 100644 (file)
@@ -6,13 +6,13 @@
    { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f },
    { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
    { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 },
-   { 0x04800031, 0x22401cdd, 0x00000000, 0x02188004 },
+   { 0x04600031, 0x22401cd1, 0x00000000, 0x02188004 },
    { 0x00200041, 0x24002e25, 0x004500a0, 0x00100010 },
    { 0x00000040, 0x24003ca5, 0x00000400, 0xfffcfffc },
    { 0x00000001, 0x240800e1, 0x00000000, 0x000f0003 },
    { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
    { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 },
-   { 0x04800031, 0x22801cdd, 0x00000000, 0x02288004 },
+   { 0x04600031, 0x22801cd1, 0x00000000, 0x02288004 },
    { 0x00200041, 0x24082e29, 0x004500a0, 0x00100010 },
    { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
    { 0x00600001, 0x20000022, 0x008d0400, 0x00000000 },
@@ -28,6 +28,7 @@
    { 0x02000041, 0x200045a0, 0x00000440, 0x000000a1 },
    { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000008 },
    { 0x00600001, 0x20200022, 0x008d0420, 0x00000000 },
+   { 0x00600001, 0x20400062, 0x00000000, 0x00000000 },
    { 0x00600001, 0x20400022, 0x008d0240, 0x00000000 },
    { 0x00600001, 0x206000e2, 0x00000000, 0x00000000 },
    { 0x00800001, 0x20600232, 0x00cf0283, 0x00000000 },
diff --git a/i965_drv_video/shaders/vme/intra_frame.g7a b/i965_drv_video/shaders/vme/intra_frame.g7a
new file mode 100644 (file)
index 0000000..c43e739
--- /dev/null
@@ -0,0 +1,2 @@
+include(`gen7_vme_header.inc')
+include(`intra_frame.asm')
diff --git a/i965_drv_video/shaders/vme/intra_frame.g7b b/i965_drv_video/shaders/vme/intra_frame.g7b
new file mode 100644 (file)
index 0000000..cc063d8
--- /dev/null
@@ -0,0 +1,47 @@
+   { 0x00800001, 0x24000061, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x24400061, 0x00000000, 0x00000000 },
+   { 0x00200041, 0x24002e25, 0x004500a0, 0x00100010 },
+   { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 },
+   { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff },
+   { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f },
+   { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
+   { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 },
+   { 0x04600031, 0x22401cb1, 0x00000800, 0x02190004 },
+   { 0x00200041, 0x24002e25, 0x004500a0, 0x00100010 },
+   { 0x00000040, 0x24003ca5, 0x00000400, 0xfffcfffc },
+   { 0x00000001, 0x240800e1, 0x00000000, 0x000f0003 },
+   { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
+   { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 },
+   { 0x04600031, 0x22801cb1, 0x00000800, 0x02290004 },
+   { 0x00200041, 0x24082e29, 0x004500a0, 0x00100010 },
+   { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
+   { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 },
+   { 0x00000001, 0x243c00f1, 0x00000000, 0x00000006 },
+   { 0x02000010, 0x20002e28, 0x000000a0, 0x00000000 },
+   { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000060 },
+   { 0x02000010, 0x20002e28, 0x000000a1, 0x00000000 },
+   { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000010 },
+   { 0x02000041, 0x20004628, 0x000000a0, 0x000000a1 },
+   { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000004 },
+   { 0x00000040, 0x24402e2d, 0x000000a0, 0x00010001 },
+   { 0x00000040, 0x2440352d, 0x000000a2, 0x00004440 },
+   { 0x02000041, 0x200045a0, 0x00000440, 0x000000a1 },
+   { 0x00010040, 0x243d1e31, 0x0000043d, 0x00000008 },
+   { 0x00600001, 0x28200021, 0x008d0420, 0x00000000 },
+   { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28600021, 0x008d0240, 0x00000000 },
+   { 0x00600001, 0x288000e1, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x28800231, 0x00cf0283, 0x00000000 },
+   { 0x00000001, 0x28900061, 0x00000000, 0x11111111 },
+   { 0x08600031, 0x21801cbd, 0x00000800, 0x0a184001 },
+   { 0x00000041, 0x24684521, 0x000000a2, 0x000000a1 },
+   { 0x00000040, 0x24684421, 0x00000468, 0x000000a0 },
+   { 0x00000001, 0x24740231, 0x00000014, 0x00000000 },
+   { 0x00600001, 0x28000021, 0x008d0460, 0x00000000 },
+   { 0x00000001, 0x28200021, 0x00000180, 0x00000000 },
+   { 0x00000001, 0x28240021, 0x00000190, 0x00000000 },
+   { 0x00000001, 0x28280021, 0x00000194, 0x00000000 },
+   { 0x00000001, 0x282c0021, 0x00000198, 0x00000000 },
+   { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0000 },
+   { 0x00600001, 0x28000021, 0x008d0000, 0x00000000 },
+   { 0x07800031, 0x24001ca8, 0x00000800, 0x82000010 },