From 38fa18a7a325e1aca105c643dee0ae8305f4b35f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Nov 2021 14:37:57 -0600 Subject: [PATCH] intel/fs: Be more conservative in split_virtual_grfs Instead of modifying every single instruction, keep track of which VGRFs are actually split in a bit-set, and only modify the instructions that actually touch split regs. This cuts the runtime of the following Vulkan CTS on my SKL box by 45% from 3:21 to 1:51: dEQP-VK.ssbo.phys.layout.random.16bit.scalar.13 Reviewed-by: Emma Anholt Reviewed-by: Ian Romanick Part-of: --- src/intel/compiler/brw_fs.cpp | 62 ++++++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 65c0f2d..924741e 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2135,10 +2135,15 @@ fs_visitor::split_virtual_grfs() } } + /* Bitset of which registers have been split */ + bool *vgrf_has_split = new bool[num_vars]; + memset(vgrf_has_split, 0, num_vars * sizeof(*vgrf_has_split)); + int *new_virtual_grf = new int[reg_count]; int *new_reg_offset = new int[reg_count]; int reg = 0; + bool has_splits = false; for (int i = 0; i < num_vars; i++) { /* The first one should always be 0 as a quick sanity check. */ assert(split_points[reg] == false); @@ -2154,6 +2159,8 @@ fs_visitor::split_virtual_grfs() * new virtual GRF for the previous offset many registers */ if (split_points[reg]) { + has_splits = true; + vgrf_has_split[i] = true; assert(offset <= MAX_VGRF_SIZE); int grf = alloc.allocate(offset); for (int k = reg - offset; k < reg; k++) @@ -2173,40 +2180,65 @@ fs_visitor::split_virtual_grfs() } assert(reg == reg_count); + if (!has_splits) + goto cleanup; + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { if (inst->opcode == SHADER_OPCODE_UNDEF) { - const fs_builder ibld(this, block, inst); - assert(inst->size_written % REG_SIZE == 0); - unsigned reg_offset = 0; - while (reg_offset < inst->size_written / REG_SIZE) { - reg = vgrf_to_reg[inst->dst.nr] + reg_offset; - ibld.UNDEF(fs_reg(VGRF, new_virtual_grf[reg], inst->dst.type)); - reg_offset += alloc.sizes[new_virtual_grf[reg]]; + assert(inst->dst.file == VGRF); + if (vgrf_has_split[inst->dst.nr]) { + const fs_builder ibld(this, block, inst); + assert(inst->size_written % REG_SIZE == 0); + unsigned reg_offset = 0; + while (reg_offset < inst->size_written / REG_SIZE) { + reg = vgrf_to_reg[inst->dst.nr] + reg_offset; + ibld.UNDEF(fs_reg(VGRF, new_virtual_grf[reg], inst->dst.type)); + reg_offset += alloc.sizes[new_virtual_grf[reg]]; + } + inst->remove(block); + } else { + reg = vgrf_to_reg[inst->dst.nr]; + assert(new_reg_offset[reg] == 0); + assert(new_virtual_grf[reg] == (int)inst->dst.nr); } - inst->remove(block); continue; } if (inst->dst.file == VGRF) { reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE; - inst->dst.nr = new_virtual_grf[reg]; - inst->dst.offset = new_reg_offset[reg] * REG_SIZE + - inst->dst.offset % REG_SIZE; - assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]); + if (vgrf_has_split[inst->dst.nr]) { + inst->dst.nr = new_virtual_grf[reg]; + inst->dst.offset = new_reg_offset[reg] * REG_SIZE + + inst->dst.offset % REG_SIZE; + assert((unsigned)new_reg_offset[reg] < + alloc.sizes[new_virtual_grf[reg]]); + } else { + assert(new_reg_offset[reg] == inst->dst.offset / REG_SIZE); + assert(new_virtual_grf[reg] == (int)inst->dst.nr); + } } for (int i = 0; i < inst->sources; i++) { - if (inst->src[i].file == VGRF) { - reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE; + if (inst->src[i].file != VGRF) + continue; + + reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE; + if (vgrf_has_split[inst->src[i].nr]) { inst->src[i].nr = new_virtual_grf[reg]; inst->src[i].offset = new_reg_offset[reg] * REG_SIZE + inst->src[i].offset % REG_SIZE; - assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]); + assert((unsigned)new_reg_offset[reg] < + alloc.sizes[new_virtual_grf[reg]]); + } else { + assert(new_reg_offset[reg] == inst->src[i].offset / REG_SIZE); + assert(new_virtual_grf[reg] == (int)inst->src[i].nr); } } } invalidate_analysis(DEPENDENCY_INSTRUCTION_DETAIL | DEPENDENCY_VARIABLES); +cleanup: delete[] split_points; + delete[] vgrf_has_split; delete[] new_virtual_grf; delete[] new_reg_offset; } -- 2.7.4