VME: Handle multiple macroblocks in a single thread
[profile/ivi/vaapi-intel-driver.git] / src / shaders / vme / inter_frame.asm
1 /*
2  * Copyright © <2010>, Intel Corporation.
3  *
4  * This program is licensed under the terms and conditions of the
5  * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
6  * http://www.opensource.org/licenses/eclipse-1.0.php.
7  *
8  */
9 // Modual name: IntraFrame.asm
10 //
11 // Make intra predition estimation for Intra frame
12 //
13
14 //
15 //  Now, begin source code....
16 //
17
18 /*
19  * __START
20  */
21 __INTER_START:
22 mov  (16) tmp_reg0.0<1>:UD      0x0:UD {align1};
23 mov  (16) tmp_reg2.0<1>:UD      0x0:UD {align1};
24 mov  (16) tmp_reg3.0<1>:UD      0x0:UD {align1};
25
26 shl  (2) vme_m0.8<1>:UW         orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* Source =  (x, y) * 16 */
27         
28 #ifdef DEV_SNB        
29 shl  (2) vme_m0.0<1>:UW         orig_xy_ub<2,2,1>:UB 4:UW {align1};     
30 add  (1) vme_m0.0<1>:W          vme_m0.0<2,2,1>:W -16:W {align1};               /* Reference = (x-16,y-12)-(x+32,y+24) */
31 add  (1) vme_m0.2<1>:W          vme_m0.2<2,2,1>:W -12:W {align1};
32 #else
33 mov  (1) vme_m0.0<1>:W          -16:W {align1} ;                /* Reference = (x-16,y-12)-(x+32,y+24) */
34 mov  (1) vme_m0.2<1>:W          -12:W {align1} ;
35 #endif
36         
37 mov  (1) vme_m0.12<1>:UD        INTER_PART_MASK + INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER:UD {align1};    /* 16x16 Source, 1/4 pixel, harr */
38 mov  (1) vme_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
39 mov  (1) vme_m0.22<1>:UW        REF_REGION_SIZE {align1};               /* Reference Width&Height, 32x32 */
40
41 mov  (1) vme_m1.4<1>:UD         MAX_NUM_MV:UD {align1};                                   /* Default value MAX 32 MVs */
42 mov  (1) vme_m1.8<1>:UD         SEARCH_PATH_LEN:UD {align1};
43
44 mul  (1) obw_m0.8<1>:UD         w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
45 add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
46 shl  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 0x2:UD {align1};
47 mov  (1) obw_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
48         
49 /*
50  * VME message
51  */
52 /* m0 */
53 __VME_LOOP:     
54 mov  (8) vme_msg_0.0<1>:UD      vme_m0.0<8,8,1>:UD {align1};
55         
56 /* m1 */
57 mov  (8) vme_msg_1<1>:UD        vme_m1.0<8,8,1>:UD {align1};
58         
59 /* m2 */        
60 mov  (8) vme_msg_2<1>:UD        0x0:UD {align1};
61
62 /* m3 */        
63 mov  (8) vme_msg_3<1>:UD        0x0:UD {align1};
64
65 /* m4 */        
66 mov  (8) vme_msg_4<1>:UD        0x0:UD {align1};
67
68 send (8)
69         vme_msg_ind
70         vme_wb
71         null
72         vme(
73                 BIND_IDX_VME,
74                 0,
75                 0,
76                 VME_MESSAGE_TYPE_INTER
77         )
78         mlen vme_msg_length
79         rlen vme_inter_wb_length
80         {align1};
81 /*
82  * Oword Block Write message
83  */
84 mov  (8) msg_reg0.0<1>:UD       obw_m0.0<8,8,1>:UD {align1};
85
86 #ifdef DEV_SNB        
87 mov  (2) obw_m1.0<1>:UW         vme_wb1.0<2,2,1>:UB  {align1};
88 add  (1) obw_m1.0<1>:W          obw_m1.0<2,2,1>:W -64:W {align1};
89 add  (1) obw_m1.2<1>:W          obw_m1.2<2,2,1>:W -48:W {align1}; 
90 #else
91 mov  (2) obw_m1.0<1>:UW         vme_wb1.0<2,2,1>:B  {align1};        
92 #endif       
93         
94 mov  (8) msg_reg1.0<1>:UD       obw_m1.0<8,8,0>:UD   {align1};
95
96 mov  (8) msg_reg2.0<1>:UD       obw_m1.0<8,8,0>:UD   {align1};
97
98 /* bind index 3, write 4 oword, msg type: 8(OWord Block Write) */
99 send (16)
100         msg_ind
101         obw_wb
102         null
103         data_port(
104                 OBW_CACHE_TYPE,
105                 OBW_MESSAGE_TYPE,
106                 OBW_CONTROL_3,
107                 OBW_BIND_IDX,
108                 OBW_WRITE_COMMIT_CATEGORY,
109                 OBW_HEADER_PRESENT
110         )
111         mlen 3
112         rlen obw_wb_length
113         {align1};
114
115 add             (1)     orig_x_ub<1>:ub         orig_x_ub<0,1,0>:ub             1:uw {align1} ;
116 add             (1)     vme_m0.8<1>:UW          vme_m0.8<0,1,0>:UW              16:UW {align1};    /* X += 16 */
117 #ifdef DEV_SNB        
118 add             (1)     vme_m0.0<1>:W           vme_m0.0<0,1,0>:W               16:W {align1};     /* X += 16 */
119 #endif
120
121 cmp.e.f0.0      (1)     null<1>:uw              w_in_mb_uw<0,1,0>:uw            orig_x_ub<0,1,0>:ub {align1};
122 /* (0, y + 1) */        
123 (f0.0)mov       (1)     orig_x_ub<1>:ub         0:uw {align1} ;
124 (f0.0)mov       (1)     vme_m0.8<1>:uw          0:uw {align1} ;
125 (f0.0)add       (1)     vme_m0.10<1>:uw         vme_m0.10<0,1,0>:uw             16:uw {align1} ;
126 #ifdef DEV_SNB        
127 (f0.0)mov       (1)     vme_m0.0<1>:w           -16:W {align1};                 /* Reference = (x-16,y-12)-(x+32,y+24) */
128 (f0.0)add       (1)     vme_m0.2<1>:w           vme_m0.2<0,1,0>:w               16:w {align1};
129 #endif
130
131 add             (1)     obw_m0.8<1>:UD          obw_m0.8<0,1,0>:UD              4:UW {align1} ;    /* offset += 4 */
132         
133 add.z.f0.1      (1)     num_macroblocks<1>:w    num_macroblocks<0,1,0>:w        -1:w {align1} ;
134 (-f0.1)jmpi     (1)     __VME_LOOP ;
135         
136 __EXIT: 
137         
138 /*
139  * kill thread
140  */        
141 mov  (8) msg_reg0<1>:UD         r0<8,8,1>:UD {align1};
142 send (16) msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};