From 12256fc2b2e0a54db24210a4b86f6fb5919d0fe8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 20 Mar 2009 17:08:30 -0600 Subject: [PATCH] mesa: linear scan register allocation for shader programs This is a check-point commit; not turned on yet. Use the linear scan register allocation algorithm to re-allocate temporary registers. This is done by computing the live intervals for registers and reallocating temps with that information. For some shaders this dramatically reduces the number of temp registers needed. For the time being we give up on a few cases such as relative-indexed temps and subroutine calls (but we inline most GLSL functions anyway). --- src/mesa/shader/prog_optimize.c | 428 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 407 insertions(+), 21 deletions(-) diff --git a/src/mesa/shader/prog_optimize.c b/src/mesa/shader/prog_optimize.c index ec06da1..458a69f 100644 --- a/src/mesa/shader/prog_optimize.c +++ b/src/mesa/shader/prog_optimize.c @@ -33,6 +33,9 @@ #include "prog_print.h" +#define MAX_LOOP_NESTING 50 + + static GLboolean dbg = GL_FALSE; @@ -76,6 +79,37 @@ remove_instructions(struct gl_program *prog, const GLboolean *removeFlags) /** + * Remap register indexes according to map. + * \param prog the program to search/replace + * \param file the type of register file to search/replace + * \param map maps old register indexes to new indexes + */ +static void +replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[]) +{ + GLuint i; + + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *inst = prog->Instructions + i; + const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == file) { + GLuint index = inst->SrcReg[j].Index; + ASSERT(map[index] >= 0); + inst->SrcReg[j].Index = map[index]; + } + } + if (inst->DstReg.File == file) { + const GLuint index = inst->DstReg.Index; + ASSERT(map[index] >= 0); + inst->DstReg.Index = map[index]; + } + } +} + + +/** * Consolidate temporary registers to use low numbers. For example, if the * shader only uses temps 4, 5, 8, replace them with 0, 1, 2. */ @@ -83,7 +117,7 @@ static void _mesa_consolidate_registers(struct gl_program *prog) { GLboolean tempUsed[MAX_PROGRAM_TEMPS]; - GLuint tempMap[MAX_PROGRAM_TEMPS]; + GLint tempMap[MAX_PROGRAM_TEMPS]; GLuint tempMax = 0, i; if (dbg) { @@ -92,6 +126,10 @@ _mesa_consolidate_registers(struct gl_program *prog) memset(tempUsed, 0, sizeof(tempUsed)); + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + tempMap[i] = -1; + } + /* set tempUsed[i] if temporary [i] is referenced */ for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; @@ -132,26 +170,8 @@ _mesa_consolidate_registers(struct gl_program *prog) } } - /* now replace occurances of old temp indexes with new indexes */ - for (i = 0; i < prog->NumInstructions; i++) { - struct prog_instruction *inst = prog->Instructions + i; - const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); - GLuint j; - for (j = 0; j < numSrc; j++) { - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { - GLuint index = inst->SrcReg[j].Index; - assert(index <= tempMax); - assert(tempUsed[index]); - inst->SrcReg[j].Index = tempMap[index]; - } - } - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - const GLuint index = inst->DstReg.Index; - assert(tempUsed[index]); - assert(index <= tempMax); - inst->DstReg.Index = tempMap[index]; - } - } + replace_regs(prog, PROGRAM_TEMPORARY, tempMap); + if (dbg) { _mesa_printf("Optimize: End register consolidation\n"); } @@ -409,6 +429,370 @@ _mesa_remove_extra_moves(struct gl_program *prog) } +/** A live register interval */ +struct interval +{ + GLuint Reg; /** The temporary register index */ + GLuint Start, End; /** Start/end instruction numbers */ +}; + + +/** A list of register intervals */ +struct interval_list +{ + GLuint Num; + struct interval Intervals[MAX_PROGRAM_TEMPS]; +}; + + +static void +append_interval(struct interval_list *list, const struct interval *inv) +{ + list->Intervals[list->Num++] = *inv; +} + + +/** Insert interval inv into list, sorted by interval end */ +static void +insert_interval_by_end(struct interval_list *list, const struct interval *inv) +{ + /* XXX we could do a binary search insertion here since list is sorted */ + GLint i = list->Num - 1; + while (i >= 0 && list->Intervals[i].End > inv->End) { + list->Intervals[i + 1] = list->Intervals[i]; + i--; + } + list->Intervals[i + 1] = *inv; + list->Num++; + +#ifdef DEBUG + { + GLuint i; + for (i = 0; i + 1 < list->Num; i++) { + ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End); + } + } +#endif +} + + +/** Remove the given interval from the interval list */ +static void +remove_interval(struct interval_list *list, const struct interval *inv) +{ + /* XXX we could binary search since list is sorted */ + GLuint k; + for (k = 0; k < list->Num; k++) { + if (list->Intervals[k].Reg == inv->Reg) { + /* found, remove it */ + ASSERT(list->Intervals[k].Start == inv->Start); + ASSERT(list->Intervals[k].End == inv->End); + while (k < list->Num - 1) { + list->Intervals[k] = list->Intervals[k + 1]; + k++; + } + list->Num--; + return; + } + } +} + + +/** called by qsort() */ +static int +compare_start(const void *a, const void *b) +{ + const struct interval *ia = (const struct interval *) a; + const struct interval *ib = (const struct interval *) b; + if (ia->Start < ib->Start) + return -1; + else if (ia->Start > ib->Start) + return +1; + else + return 0; +} + +/** sort the interval list according to interval starts */ +static void +sort_interval_list_by_start(struct interval_list *list) +{ + qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start); +#ifdef DEBUG + { + GLuint i; + for (i = 0; i + 1 < list->Num; i++) { + ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start); + } + } +#endif +} + + +/** + * Update the intermediate interval info for register 'index' and + * instruction 'ic'. + */ +static void +update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic) +{ + ASSERT(index < MAX_PROGRAM_TEMPS); + if (intBegin[index] == -1) { + ASSERT(intEnd[index] == -1); + intBegin[index] = intEnd[index] = ic; + } + else { + intEnd[index] = ic; + } +} + + +/** + * Find the live intervals for each temporary register in the program. + * For register R, the interval [A,B] indicates that R is referenced + * from instruction A through instruction B. + * Special consideration is needed for loops and subroutines. + * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason + */ +static GLboolean +find_live_intervals(struct gl_program *prog, + struct interval_list *liveIntervals) +{ + struct loop_info + { + GLuint Start, End; /**< Start, end instructions of loop */ + }; + struct loop_info loopStack[MAX_LOOP_NESTING]; + GLuint loopStackDepth = 0; + GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS]; + GLuint i; + + /* + * Note: we'll return GL_FALSE below if we find relative indexing + * into the TEMP register file. We can't handle that yet. + * We also give up on subroutines for now. + */ + + if (dbg) { + _mesa_printf("Optimize: Begin find intervals\n"); + } + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ + intBegin[i] = intEnd[i] = -1; + } + + /* Scan instructions looking for temporary registers */ + for (i = 0; i < prog->NumInstructions; i++) { + const struct prog_instruction *inst = prog->Instructions + i; + if (inst->Opcode == OPCODE_BGNLOOP) { + loopStack[loopStackDepth].Start = i; + loopStack[loopStackDepth].End = inst->BranchTarget; + loopStackDepth++; + } + else if (inst->Opcode == OPCODE_ENDLOOP) { + loopStackDepth--; + } + else if (inst->Opcode == OPCODE_CAL) { + return GL_FALSE; + } + else { + const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + const GLuint index = inst->SrcReg[j].Index; + if (inst->SrcReg[j].RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + const GLuint index = inst->DstReg.Index; + if (inst->DstReg.RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + } + + /* Build live intervals list from intermediate arrays */ + liveIntervals->Num = 0; + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (intBegin[i] >= 0) { + struct interval inv; + inv.Reg = i; + inv.Start = intBegin[i]; + inv.End = intEnd[i]; + append_interval(liveIntervals, &inv); + } + } + + /* Sort the list according to interval starts */ + sort_interval_list_by_start(liveIntervals); + + if (dbg) { + /* print interval info */ + for (i = 0; i < liveIntervals->Num; i++) { + const struct interval *inv = liveIntervals->Intervals + i; + _mesa_printf("Reg[%d] live [%d, %d]:", + inv->Reg, inv->Start, inv->End); + if (1) { + int j; + for (j = 0; j < inv->Start; j++) + _mesa_printf(" "); + for (j = inv->Start; j <= inv->End; j++) + _mesa_printf("x"); + } + _mesa_printf("\n"); + } + } + + return GL_TRUE; +} + + +static GLuint +alloc_register(GLboolean usedRegs[MAX_PROGRAM_TEMPS]) +{ + GLuint k; + for (k = 0; k < MAX_PROGRAM_TEMPS; k++) { + if (!usedRegs[k]) { + usedRegs[k] = GL_TRUE; + return k; + } + } + return MAX_PROGRAM_TEMPS; +} + + +/** + * This function implements "Linear Scan Register Allocation" to reduce + * the number of temporary registers used by the program. + * + * We compute the "live interval" for all temporary registers then + * examine the overlap of the intervals to allocate new registers. + * Basically, if two intervals do not overlap, they can use the same register. + */ +static void +_mesa_reallocate_registers(struct gl_program *prog) +{ + struct interval_list liveIntervals; + GLint registerMap[MAX_PROGRAM_TEMPS]; + GLboolean usedRegs[MAX_PROGRAM_TEMPS]; + GLuint i; + GLuint maxTemp = 0; + + if (dbg) { + _mesa_printf("Optimize: Begin live-interval register reallocation\n"); + _mesa_print_program(prog); + } + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ + registerMap[i] = -1; + usedRegs[i] = GL_FALSE; + } + + if (!find_live_intervals(prog, &liveIntervals)) { + if (dbg) + _mesa_printf("Aborting register reallocation\n"); + return; + } + + { + struct interval_list activeIntervals; + activeIntervals.Num = 0; + + /* loop over live intervals, allocating a new register for each */ + for (i = 0; i < liveIntervals.Num; i++) { + const struct interval *live = liveIntervals.Intervals + i; + + if (dbg) + _mesa_printf("Consider register %u\n", live->Reg); + + /* Expire old intervals. Intervals which have ended with respect + * to the live interval can have their remapped registers freed. + */ + { + GLint j; + for (j = 0; j < activeIntervals.Num; j++) { + const struct interval *inv = activeIntervals.Intervals + j; + if (inv->End >= live->Start) { + /* Stop now. Since the activeInterval list is sorted + * we know we don't have to go further. + */ + break; + } + else { + /* Interval 'inv' has expired */ + const GLint regNew = registerMap[inv->Reg]; + ASSERT(regNew >= 0); + + if (dbg) + _mesa_printf(" expire interval for reg %u\n", inv->Reg); + + /* remove interval j from active list */ + remove_interval(&activeIntervals, inv); + j--; /* counter-act j++ in for-loop above */ + + /* return register regNew to the free pool */ + if (dbg) + _mesa_printf(" free reg %d\n", regNew); + ASSERT(usedRegs[regNew] == GL_TRUE); + usedRegs[regNew] = GL_FALSE; + } + } + } + + /* find a free register for this live interval */ + { + const GLuint k = alloc_register(usedRegs); + if (k == MAX_PROGRAM_TEMPS) { + /* out of registers, give up */ + return; + } + registerMap[live->Reg] = k; + maxTemp = MAX2(maxTemp, k); + if (dbg) + _mesa_printf(" remap register %d -> %d\n", live->Reg, k); + } + + /* Insert this live interval into the active list which is sorted + * by increasing end points. + */ + insert_interval_by_end(&activeIntervals, live); + } + } + + if (maxTemp + 1 < liveIntervals.Num) { + /* OK, we've reduced the number of registers needed. + * Scan the program and replace all the old temporary register + * indexes with the new indexes. + */ + replace_regs(prog, PROGRAM_TEMPORARY, registerMap); + + prog->NumTemporaries = maxTemp + 1; + } + + if (dbg) { + _mesa_printf("Optimize: End live-interval register reallocation\n"); + _mesa_printf("Num temp regs before: %u after: %u\n", + liveIntervals.Num, maxTemp + 1); + _mesa_print_program(prog); + } +} + + + + /** * Apply optimizations to the given program to eliminate unnecessary * instructions, temp regs, etc. @@ -424,4 +808,6 @@ _mesa_optimize_program(GLcontext *ctx, struct gl_program *program) if (1) _mesa_consolidate_registers(program); + else /*NEW*/ + _mesa_reallocate_registers(program); } -- 2.7.4