VME: Handle multiple macroblocks in a single thread
[profile/ivi/vaapi-intel-driver.git] / src / shaders / vme / batchbuffer.asm
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Xiang Haihao <haihao.xiang@intel.com>
25  */
26         
27 /*
28  * __START
29  */
30 __START:
31         mov             (16)    tmp_reg0<1>:ud                  0x0:ud {align1} ;
32         mov             (16)    tmp_reg2<1>:ud                  0x0:ud {align1} ;
33         mov             (1)     obw_header.20<1>:ub             thread_id_ub {align1};                  /* dispatch id */
34
35         mov             (8)     media_object_ud<1>:ud           0x0:ud {align1} ;
36         mov             (1)     media_object0_ud<1>:ud          CMD_MEDIA_OBJECT {align1} ;
37         mov             (1)     media_object1_ud<1>:ud          mtype_ub<0,1,0>ub {align1};
38         mov             (1)     media_object6_width<1>:uw       width_in_mb<0,1,0>:uw {align1};
39         mov             (1)     media_object7_flag<1>:uw        transform_8x8_ub<0,1,0>ub {align1};
40         mov             (1)     media_object7_num_mbs<1>:uw     NUM_MACROBLOCKS_PER_COMMAND:uw {align1} ;
41
42         mov             (1)     width_per_row<1>:ud             width_in_mb<0,1,0>:uw {align1} ;
43         and.z.f0.1      (1)     remainder_cmds<1>:ud            total_mbs<0,1,0>:ud     (NUM_MACROBLOCKS_PER_COMMAND - 1):ud {align1} ;
44         and.z.f0.0      (1)     total_mbs<1>:ud                 total_mbs<0,1,0>:ud     -NUM_MACROBLOCKS_PER_COMMAND:ud {align1} ;
45
46         (f0.0)jmpi      (1)     __REMAINDER ;
47         
48 __CMD_LOOP:
49         mov             (8)     msg_reg0.0<1>:ud                obw_header<8,8,1>:ud {align1};
50         mov             (8)     msg_reg1<1>:ud                  media_object_ud<8,8,1>:ud {align1};
51         
52 /* bind index 5, write 2 oword, msg type: 8(OWord Block Write) */
53 send (16)
54         msg_ind
55         obw_wb
56         null
57         data_port(
58                 OBW_CACHE_TYPE,
59                 OBW_MESSAGE_TYPE,
60                 OBW_CONTROL_2,
61                 OBW_BIND_IDX,
62                 OBW_WRITE_COMMIT_CATEGORY,
63                 OBW_HEADER_PRESENT
64         )
65         mlen 2
66         rlen obw_wb_length
67         {align1};
68
69         /* (x, y) of the first macroblock */
70         add             (1)     count<1>:ud                     count<0,1,0>:ud         NUM_MACROBLOCKS_PER_COMMAND:uw {align1} ;
71         math            (1)     quotient<1>:ud                  count<0,1,0>:ud         width_per_row<0,1,0>:ud intdivmod {align1} ;
72         shl             (1)     quotient<1>:ud                  quotient<0,1,0>:ud      8:uw {align1} ;
73         add             (1)     quotient<1>:ud                  quotient<0,1,0>:ud      remainder<0,1,0>:ud {align1} ;
74         mov             (1)     media_object6_xy<1>:uw          quotient<0,1,0>:uw {align1} ;
75         
76         /* the new offset */
77         add             (1)     obw_header.8<1>:ud              obw_header.8<0,1,0>:ud  2:uw {align1} ;
78
79         add.z.f0.0      (1)     total_mbs<1>:w                  total_mbs<0,1,0>:w      -NUM_MACROBLOCKS_PER_COMMAND:w {align1} ;
80         (-f0.0)jmpi     (1)     __CMD_LOOP ;
81
82 __REMAINDER:
83         (f0.1)jmpi      (1)     __DONE ;
84
85         mov             (1)     media_object7_num_mbs<1>:uw     remainder_cmds<0,1,0>:uw {align1} ;        
86         mov             (8)     msg_reg0.0<1>:ud                obw_header<8,8,1>:ud {align1};
87         mov             (8)     msg_reg1<1>:ud                  media_object_ud<8,8,1>:ud {align1};
88         
89 send (16)
90         msg_ind
91         obw_wb
92         null
93         data_port(
94                 OBW_CACHE_TYPE,
95                 OBW_MESSAGE_TYPE,
96                 OBW_CONTROL_2,
97                 OBW_BIND_IDX,
98                 OBW_WRITE_COMMIT_CATEGORY,
99                 OBW_HEADER_PRESENT
100         )
101         mlen 2
102         rlen obw_wb_length
103         {align1};
104
105         /* the new offset */
106         add             (1)     obw_header.8<1>:ud              obw_header.8<0,1,0>:ud  2:uw {align1} ;
107         
108 __DONE:
109
110 /* bind index 5, write 1 oword, msg type: 8(OWord Block Write) */
111         mov             (8)     msg_reg0.0<1>:ud                obw_header<8,8,1>:ud {align1} ;
112         mov             (4)     msg_reg1.0<1>:ud                0x0:ud {align1} ;
113         mov             (1)     msg_reg1.4<1>:ud                MI_BATCH_BUFFER_END {align1} ;
114         
115 send (16)
116         msg_ind
117         obw_wb
118         null
119         data_port(
120                 OBW_CACHE_TYPE,
121                 OBW_MESSAGE_TYPE,
122                 OBW_CONTROL_0,
123                 OBW_BIND_IDX,
124                 OBW_WRITE_COMMIT_CATEGORY,
125                 OBW_HEADER_PRESENT
126         )
127         mlen 2
128         rlen obw_wb_length
129         {align1};
130
131 __EXIT:
132         mov             (8)     msg_reg0<1>:ud                  r0<8,8,1>:ud {align1} ;
133         send            (16)    msg_ind acc0<1>ud null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT} ;