Tizen 2.0 Release
[profile/ivi/osmesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_dataflow_deadcode.c
1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  *
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial
16  * portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  */
27
28 #include "radeon_dataflow.h"
29
30 #include "radeon_compiler.h"
31
32
33 struct updatemask_state {
34         unsigned char Output[RC_REGISTER_MAX_INDEX];
35         unsigned char Temporary[RC_REGISTER_MAX_INDEX];
36         unsigned char Address;
37         unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
38 };
39
40 struct instruction_state {
41         unsigned char WriteMask:4;
42         unsigned char WriteALUResult:1;
43         unsigned char SrcReg[3];
44 };
45
46 struct loopinfo {
47         struct updatemask_state * Breaks;
48         unsigned int BreakCount;
49         unsigned int BreaksReserved;
50 };
51
52 struct branchinfo {
53         unsigned int HaveElse:1;
54
55         struct updatemask_state StoreEndif;
56         struct updatemask_state StoreElse;
57 };
58
59 struct deadcode_state {
60         struct radeon_compiler * C;
61         struct instruction_state * Instructions;
62
63         struct updatemask_state R;
64
65         struct branchinfo * BranchStack;
66         unsigned int BranchStackSize;
67         unsigned int BranchStackReserved;
68
69         struct loopinfo * LoopStack;
70         unsigned int LoopStackSize;
71         unsigned int LoopStackReserved;
72 };
73
74
75 static void or_updatemasks(
76         struct updatemask_state * dst,
77         struct updatemask_state * a,
78         struct updatemask_state * b)
79 {
80         for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
81                 dst->Output[i] = a->Output[i] | b->Output[i];
82                 dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
83         }
84
85         for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
86                 dst->Special[i] = a->Special[i] | b->Special[i];
87
88         dst->Address = a->Address | b->Address;
89 }
90
91 static void push_break(struct deadcode_state *s)
92 {
93         struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];
94         memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,
95                 loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);
96
97         memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));
98 }
99
100 static void push_loop(struct deadcode_state * s)
101 {
102         memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
103                         s->LoopStackSize, s->LoopStackReserved, 1);
104         memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
105 }
106
107 static void push_branch(struct deadcode_state * s)
108 {
109         struct branchinfo * branch;
110
111         memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
112                         s->BranchStackSize, s->BranchStackReserved, 1);
113
114         branch = &s->BranchStack[s->BranchStackSize++];
115         branch->HaveElse = 0;
116         memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
117 }
118
119 static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
120 {
121         if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
122                 if (index >= RC_REGISTER_MAX_INDEX) {
123                         rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
124                         return 0;
125                 }
126
127                 if (file == RC_FILE_OUTPUT)
128                         return &s->R.Output[index];
129                 else
130                         return &s->R.Temporary[index];
131         } else if (file == RC_FILE_ADDRESS) {
132                 return &s->R.Address;
133         } else if (file == RC_FILE_SPECIAL) {
134                 if (index >= RC_NUM_SPECIAL_REGISTERS) {
135                         rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
136                         return 0;
137                 }
138
139                 return &s->R.Special[index];
140         }
141
142         return 0;
143 }
144
145 static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
146 {
147         unsigned char * pused = get_used_ptr(s, file, index);
148         if (pused)
149                 *pused |= mask;
150 }
151
152 static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
153 {
154         const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
155         struct instruction_state * insts = &s->Instructions[inst->IP];
156         unsigned int usedmask = 0;
157         unsigned int srcmasks[3];
158
159         if (opcode->HasDstReg) {
160                 unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
161                 if (pused) {
162                         usedmask = *pused & inst->U.I.DstReg.WriteMask;
163                         *pused &= ~usedmask;
164                 }
165         }
166
167         insts->WriteMask |= usedmask;
168
169         if (inst->U.I.WriteALUResult) {
170                 unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
171                 if (pused && *pused) {
172                         if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
173                                 usedmask |= RC_MASK_X;
174                         else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
175                                 usedmask |= RC_MASK_W;
176
177                         *pused = 0;
178                         insts->WriteALUResult = 1;
179                 }
180         }
181
182         rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
183
184         for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
185                 unsigned int refmask = 0;
186                 unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
187                 insts->SrcReg[src] |= newsrcmask;
188
189                 for(unsigned int chan = 0; chan < 4; ++chan) {
190                         if (GET_BIT(newsrcmask, chan))
191                                 refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
192                 }
193
194                 /* get rid of spurious bits from ZERO, ONE, etc. swizzles */
195                 refmask &= RC_MASK_XYZW;
196
197                 if (!refmask)
198                         continue;
199
200                 mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
201
202                 if (inst->U.I.SrcReg[src].RelAddr)
203                         mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
204         }
205 }
206
207 static void mark_output_use(void * data, unsigned int index, unsigned int mask)
208 {
209         struct deadcode_state * s = data;
210
211         mark_used(s, RC_FILE_OUTPUT, index, mask);
212 }
213
214 void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
215 {
216         struct deadcode_state s;
217         unsigned int nr_instructions;
218         rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;
219         unsigned int ip;
220
221         memset(&s, 0, sizeof(s));
222         s.C = c;
223
224         nr_instructions = rc_recompute_ips(c);
225         s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
226         memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
227
228         dce(c, &s, &mark_output_use);
229
230         for(struct rc_instruction * inst = c->Program.Instructions.Prev;
231             inst != &c->Program.Instructions;
232             inst = inst->Prev) {
233                 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
234
235                 switch(opcode->Opcode){
236                 /* Mark all sources in the loop body as used before doing
237                  * normal deadcode analysis.  This is probably not optimal.
238                  */
239                 case RC_OPCODE_ENDLOOP:
240                 {
241                         int endloops = 1;
242                         struct rc_instruction *ptr;
243                         for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
244                                 opcode = rc_get_opcode_info(ptr->U.I.Opcode);
245                                 if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
246                                         endloops--;
247                                         continue;
248                                 }
249                                 if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
250                                         endloops++;
251                                         continue;
252                                 }
253                                 if(opcode->HasDstReg){
254                                         int src = 0;
255                                         unsigned int srcmasks[3];
256                                         rc_compute_sources_for_writemask(ptr,
257                                                 ptr->U.I.DstReg.WriteMask, srcmasks);
258                                         for(src=0; src < opcode->NumSrcRegs; src++){
259                                                 mark_used(&s,
260                                                         ptr->U.I.SrcReg[src].File,
261                                                         ptr->U.I.SrcReg[src].Index,
262                                                         srcmasks[src]);
263                                         }
264                                 }
265                         }
266                         push_loop(&s);
267                         break;
268                 }
269                 case RC_OPCODE_BRK:
270                         push_break(&s);
271                         break;
272                 case RC_OPCODE_BGNLOOP:
273                 {
274                         unsigned int i;
275                         struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
276                         for(i = 0; i < loop->BreakCount; i++) {
277                                 or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);
278                         }
279                         break;
280                 }
281                 case RC_OPCODE_CONT:
282                         break;
283                 case RC_OPCODE_ENDIF:
284                         push_branch(&s);
285                         break;
286                 default:
287                         if (opcode->IsFlowControl && s.BranchStackSize) {
288                                 struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
289                                 if (opcode->Opcode == RC_OPCODE_IF) {
290                                         or_updatemasks(&s.R,
291                                                         &s.R,
292                                                         branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
293
294                                         s.BranchStackSize--;
295                                 } else if (opcode->Opcode == RC_OPCODE_ELSE) {
296                                         if (branch->HaveElse) {
297                                                 rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
298                                         } else {
299                                                 memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
300                                                 memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
301                                                 branch->HaveElse = 1;
302                                         }
303                                 } else {
304                                         rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
305                                 }
306                         }
307                 }
308
309                 update_instruction(&s, inst);
310         }
311
312         ip = 0;
313         for(struct rc_instruction * inst = c->Program.Instructions.Next;
314             inst != &c->Program.Instructions;
315             inst = inst->Next, ++ip) {
316                 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
317                 int dead = 1;
318                 unsigned int srcmasks[3];
319                 unsigned int usemask;
320
321                 if (!opcode->HasDstReg) {
322                         dead = 0;
323                 } else {
324                         inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
325                         if (s.Instructions[ip].WriteMask)
326                                 dead = 0;
327
328                         if (s.Instructions[ip].WriteALUResult)
329                                 dead = 0;
330                         else
331                                 inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
332                 }
333
334                 if (dead) {
335                         struct rc_instruction * todelete = inst;
336                         inst = inst->Prev;
337                         rc_remove_instruction(todelete);
338                         continue;
339                 }
340
341                 usemask = s.Instructions[ip].WriteMask;
342
343                 if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
344                         usemask |= RC_MASK_X;
345                 else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
346                         usemask |= RC_MASK_W;
347
348                 rc_compute_sources_for_writemask(inst, usemask, srcmasks);
349
350                 for(unsigned int src = 0; src < 3; ++src) {
351                         for(unsigned int chan = 0; chan < 4; ++chan) {
352                                 if (!GET_BIT(srcmasks[src], chan))
353                                         SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
354                         }
355                 }
356         }
357
358         rc_calculate_inputs_outputs(c);
359 }