i965/fs: Add wrapper functions for fs_inst::regs_read and ::regs_written.
authorFrancisco Jerez <currojerez@riseup.net>
Wed, 7 Sep 2016 23:59:35 +0000 (16:59 -0700)
committerFrancisco Jerez <currojerez@riseup.net>
Wed, 14 Sep 2016 21:50:53 +0000 (14:50 -0700)
This is in preparation for dropping fs_inst::regs_read and
::regs_written in favor of more accurate alternatives expressed in
byte units.  The main reason these wrappers are useful is that a
number of optimization passes implement dataflow analysis with
register granularity, so these helpers will come in handy once we've
switched register offsets and sizes to the byte representation.  The
wrapper functions will also make sure that GRF misalignment (currently
neglected by most of the back-end) is taken into account correctly in
the calculation of regs_read and regs_written.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_cse.cpp
src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
src/mesa/drivers/dri/i965/brw_fs_validate.cpp
src/mesa/drivers/dri/i965/brw_ir_fs.h
src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp

index 04f0457..802aa9f 100644 (file)
@@ -1772,13 +1772,13 @@ fs_visitor::split_virtual_grfs()
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       if (inst->dst.file == VGRF) {
          int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE;
-         for (int j = 1; j < inst->regs_written; j++)
+         for (unsigned j = 1; j < regs_written(inst); j++)
             split_points[reg + j] = false;
       }
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == VGRF) {
             int reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE;
-            for (int j = 1; j < inst->regs_read(i); j++)
+            for (unsigned j = 1; j < regs_read(inst, i); j++)
                split_points[reg + j] = false;
          }
       }
@@ -2611,7 +2611,7 @@ fs_visitor::opt_register_renaming()
          if (remap[dst] == -1) {
             remap[dst] = dst;
          } else {
-            remap[dst] = alloc.allocate(inst->regs_written);
+            remap[dst] = alloc.allocate(regs_written(inst));
             inst->dst.nr = remap[dst];
             progress = true;
          }
@@ -2727,7 +2727,7 @@ fs_visitor::compute_to_mrf()
        * regs_left bitset keeps track of the registers we haven't yet found a
        * generating instruction for.
        */
-      unsigned regs_left = (1 << inst->regs_read(0)) - 1;
+      unsigned regs_left = (1 << regs_read(inst, 0)) - 1;
 
       foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
          if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
@@ -2819,7 +2819,7 @@ fs_visitor::compute_to_mrf()
       /* Found all generating instructions of our MRF's source value, so it
        * should be safe to rewrite them to point to the MRF directly.
        */
-      regs_left = (1 << inst->regs_read(0)) - 1;
+      regs_left = (1 << regs_read(inst, 0)) - 1;
 
       foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
          if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
@@ -3086,7 +3086,7 @@ void
 fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
                                                         fs_inst *inst)
 {
-   int write_len = inst->regs_written;
+   int write_len = regs_written(inst);
    int first_write_grf = inst->dst.nr;
    bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
    assert(write_len < (int)sizeof(needs_dep) - 1);
@@ -3119,7 +3119,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
        * dependency has more latency than a MOV.
        */
       if (scan_inst->dst.file == VGRF) {
-         for (int i = 0; i < scan_inst->regs_written; i++) {
+         for (unsigned i = 0; i < regs_written(scan_inst); i++) {
             int reg = scan_inst->dst.nr + i;
 
             if (reg >= first_write_grf &&
@@ -3157,7 +3157,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
 void
 fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_inst *inst)
 {
-   int write_len = inst->regs_written;
+   int write_len = regs_written(inst);
    int first_write_grf = inst->dst.nr;
    bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
    assert(write_len < (int)sizeof(needs_dep) - 1);
@@ -3800,7 +3800,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
       /* Send from the GRF */
       fs_reg payload = fs_reg(VGRF, -1, BRW_REGISTER_TYPE_F);
       load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size);
-      payload.nr = bld.shader->alloc.allocate(load->regs_written);
+      payload.nr = bld.shader->alloc.allocate(regs_written(load));
       load->dst = payload;
 
       inst->src[0] = payload;
@@ -3821,7 +3821,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
    }
 
    inst->opcode = FS_OPCODE_FB_WRITE;
-   inst->mlen = load->regs_written;
+   inst->mlen = regs_written(load);
    inst->header_size = header_size;
 }
 
@@ -4069,7 +4069,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
                                 unsigned grad_components)
 {
    const gen_device_info *devinfo = bld.shader->devinfo;
-   int reg_width = bld.dispatch_width() / 8;
+   unsigned reg_width = bld.dispatch_width() / 8;
    unsigned header_size = 0, length = 0;
    fs_reg sources[MAX_SAMPLER_MESSAGE_SIZE];
    for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)
@@ -4097,9 +4097,9 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
        * and we have an explicit header, we need to set up the sampler
        * writemask.  It's reversed from normal: 1 means "don't write".
        */
-      if (!inst->eot && inst->regs_written != 4 * reg_width) {
-         assert((inst->regs_written % reg_width) == 0);
-         unsigned mask = ~((1 << (inst->regs_written / reg_width)) - 1) & 0xf;
+      if (!inst->eot && regs_written(inst) != 4 * reg_width) {
+         assert(regs_written(inst) % reg_width == 0);
+         unsigned mask = ~((1 << (regs_written(inst) / reg_width)) - 1) & 0xf;
          inst->offset |= mask << 12;
       }
    }
index 0c65c5b..4744142 100644 (file)
@@ -199,8 +199,8 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate)
 static void
 create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
 {
-   int written = inst->regs_written;
-   int dst_width =
+   unsigned written = regs_written(inst);
+   unsigned dst_width =
       DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE);
    fs_inst *copy;
 
@@ -234,7 +234,7 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
       copy->force_writemask_all = inst->force_writemask_all;
       copy->src[0].negate = negate;
    }
-   assert(copy->regs_written == written);
+   assert(regs_written(copy) == written);
 }
 
 bool
@@ -284,7 +284,7 @@ fs_visitor::opt_cse_local(bblock_t *block)
             if (no_existing_temp && !entry->generator->dst.is_null()) {
                const fs_builder ibld = fs_builder(this, block, entry->generator)
                                        .at(block, entry->generator->next);
-               int written = entry->generator->regs_written;
+               int written = regs_written(entry->generator);
 
                entry->tmp = fs_reg(VGRF, alloc.allocate(written),
                                    entry->generator->dst.type);
index 45f5c5e..4558bd4 100644 (file)
@@ -55,12 +55,12 @@ fs_visitor::dead_code_eliminate()
          if (inst->dst.file == VGRF && !inst->has_side_effects()) {
             bool result_live = false;
 
-            if (inst->regs_written == 1) {
+            if (regs_written(inst) == 1) {
                int var = live_intervals->var_from_reg(inst->dst);
                result_live = BITSET_TEST(live, var);
             } else {
                int var = live_intervals->var_from_reg(inst->dst);
-               for (int i = 0; i < inst->regs_written; i++) {
+               for (unsigned i = 0; i < regs_written(inst); i++) {
                   result_live = result_live || BITSET_TEST(live, var + i);
                }
             }
@@ -96,7 +96,7 @@ fs_visitor::dead_code_eliminate()
          if (inst->dst.file == VGRF) {
             if (!inst->is_partial_write()) {
                int var = live_intervals->var_from_reg(inst->dst);
-               for (int i = 0; i < inst->regs_written; i++) {
+               for (unsigned i = 0; i < regs_written(inst); i++) {
                   BITSET_CLEAR(live, var + i);
                }
             }
@@ -114,7 +114,7 @@ fs_visitor::dead_code_eliminate()
             if (inst->src[i].file == VGRF) {
                int var = live_intervals->var_from_reg(inst->src[i]);
 
-               for (int j = 0; j < inst->regs_read(i); j++) {
+               for (unsigned j = 0; j < regs_read(inst, i); j++) {
                   BITSET_SET(live, var + j);
                }
             }
index 02dc777..a6c98e3 100644 (file)
@@ -118,7 +118,7 @@ fs_live_variables::setup_def_use()
             if (reg.file != VGRF)
                continue;
 
-            for (int j = 0; j < inst->regs_read(i); j++) {
+            for (unsigned j = 0; j < regs_read(inst, i); j++) {
                setup_one_read(bd, inst, ip, reg);
                reg.offset += REG_SIZE;
             }
@@ -129,7 +129,7 @@ fs_live_variables::setup_def_use()
          /* Set def[] for this instruction */
          if (inst->dst.file == VGRF) {
             fs_reg reg = inst->dst;
-            for (int j = 0; j < inst->regs_written; j++) {
+            for (unsigned j = 0; j < regs_written(inst); j++) {
                setup_one_write(bd, inst, ip, reg);
                reg.offset += REG_SIZE;
             }
index 82adaa3..572735a 100644 (file)
@@ -362,9 +362,9 @@ void fs_visitor::calculate_payload_ranges(int payload_node_count,
             if (node_nr >= payload_node_count)
                continue;
 
-            for (int j = 0; j < inst->regs_read(i); j++) {
+            for (unsigned j = 0; j < regs_read(inst, i); j++) {
                payload_last_use_ip[node_nr + j] = use_ip;
-               assert(node_nr + j < payload_node_count);
+               assert(node_nr + j < unsigned(payload_node_count));
             }
          }
       }
@@ -903,10 +903,10 @@ fs_visitor::spill_reg(int spill_reg)
       for (unsigned int i = 0; i < inst->sources; i++) {
         if (inst->src[i].file == VGRF &&
              inst->src[i].nr == spill_reg) {
-            int regs_read = inst->regs_read(i);
+            int count = regs_read(inst, i);
             int subset_spill_offset = spill_offset +
                ROUND_DOWN_TO(inst->src[i].offset, REG_SIZE);
-            fs_reg unspill_dst(VGRF, alloc.allocate(regs_read));
+            fs_reg unspill_dst(VGRF, alloc.allocate(count));
 
             inst->src[i].nr = unspill_dst.nr;
             inst->src[i].offset %= REG_SIZE;
@@ -916,7 +916,7 @@ fs_visitor::spill_reg(int spill_reg)
              * hardware) up to the maximum supported block size.
              */
             const unsigned width =
-               MIN2(32, 1u << (ffs(MAX2(1, regs_read) * 8) - 1));
+               MIN2(32, 1u << (ffs(MAX2(1, count) * 8) - 1));
 
             /* Set exec_all() on unspill messages under the (rather
              * pessimistic) assumption that there is no one-to-one
@@ -926,7 +926,7 @@ fs_visitor::spill_reg(int spill_reg)
              * unspill destination is a block-local temporary.
              */
             emit_unspill(ibld.exec_all().group(width, 0),
-                         unspill_dst, subset_spill_offset, regs_read);
+                         unspill_dst, subset_spill_offset, count);
         }
       }
 
@@ -934,7 +934,7 @@ fs_visitor::spill_reg(int spill_reg)
           inst->dst.nr == spill_reg) {
          int subset_spill_offset = spill_offset +
             ROUND_DOWN_TO(inst->dst.offset, REG_SIZE);
-         fs_reg spill_src(VGRF, alloc.allocate(inst->regs_written));
+         fs_reg spill_src(VGRF, alloc.allocate(regs_written(inst)));
 
          inst->dst.nr = spill_src.nr;
          inst->dst.offset %= REG_SIZE;
@@ -971,19 +971,19 @@ fs_visitor::spill_reg(int spill_reg)
          const fs_builder ubld = ibld.exec_all(!per_channel).group(width, 0);
 
         /* If our write is going to affect just part of the
-          * inst->regs_written(), then we need to unspill the destination
-          * since we write back out all of the regs_written().  If the
-          * original instruction had force_writemask_all set and is not a
-          * partial write, there should be no need for the unspill since the
+          * regs_written(inst), then we need to unspill the destination since
+          * we write back out all of the regs_written().  If the original
+          * instruction had force_writemask_all set and is not a partial
+          * write, there should be no need for the unspill since the
           * instruction will be overwriting the whole destination in any case.
          */
          if (inst->is_partial_write() ||
              (!inst->force_writemask_all && !per_channel))
             emit_unspill(ubld, spill_src, subset_spill_offset,
-                         inst->regs_written);
+                         regs_written(inst));
 
          emit_spill(ubld.at(block, inst->next), spill_src,
-                    subset_spill_offset, inst->regs_written);
+                    subset_spill_offset, regs_written(inst));
       }
    }
 
index 651c136..3dd0fbf 100644 (file)
@@ -190,7 +190,7 @@ fs_visitor::register_coalesce()
             dst_reg_offset[i] = i;
          }
          mov[0] = inst;
-         channels_remaining -= inst->regs_written;
+         channels_remaining -= regs_written(inst);
       } else {
          const int offset = inst->src[0].offset / REG_SIZE;
          if (mov[offset]) {
@@ -207,7 +207,7 @@ fs_visitor::register_coalesce()
          if (inst->regs_written > 1)
             dst_reg_offset[offset + 1] = inst->dst.offset / REG_SIZE + 1;
          mov[offset] = inst;
-         channels_remaining -= inst->regs_written;
+         channels_remaining -= regs_written(inst);
       }
 
       if (channels_remaining)
index 10ad7c3..676942c 100644 (file)
@@ -43,14 +43,14 @@ fs_visitor::validate()
 {
    foreach_block_and_inst (block, fs_inst, inst, cfg) {
       if (inst->dst.file == VGRF) {
-         fsv_assert(inst->dst.offset / REG_SIZE + inst->regs_written <=
+         fsv_assert(inst->dst.offset / REG_SIZE + regs_written(inst) <=
                     alloc.sizes[inst->dst.nr]);
       }
 
       for (unsigned i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == VGRF) {
-            fsv_assert(inst->src[i].offset / REG_SIZE + inst->regs_read(i) <=
-                       (int)alloc.sizes[inst->src[i].nr]);
+            fsv_assert(inst->src[i].offset / REG_SIZE + regs_read(inst, i) <=
+                       alloc.sizes[inst->src[i].nr]);
          }
       }
    }
index 19ef242..de08a69 100644 (file)
@@ -411,4 +411,30 @@ set_saturate(bool saturate, fs_inst *inst)
    return inst;
 }
 
+/**
+ * Return the number of dataflow registers written by the instruction (either
+ * fully or partially) counted from 'floor(reg_offset(inst->dst) /
+ * register_size)'.  The somewhat arbitrary register size unit is 4B for the
+ * UNIFORM and IMM files and 32B for all other files.
+ */
+inline unsigned
+regs_written(const fs_inst *inst)
+{
+   /* XXX - Take into account register-misaligned offsets correctly. */
+   return inst->regs_written;
+}
+
+/**
+ * Return the number of dataflow registers read by the instruction (either
+ * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
+ * register_size)'.  The somewhat arbitrary register size unit is 4B for the
+ * UNIFORM and IMM files and 32B for all other files.
+ */
+inline unsigned
+regs_read(const fs_inst *inst, unsigned i)
+{
+   /* XXX - Take into account register-misaligned offsets correctly. */
+   return inst->regs_read(i);
+}
+
 #endif
index dde7554..0d3a07c 100644 (file)
@@ -620,7 +620,7 @@ fs_instruction_scheduler::count_reads_remaining(backend_instruction *be)
          if (inst->src[i].nr >= hw_reg_count)
             continue;
 
-         for (int j = 0; j < inst->regs_read(i); j++)
+         for (unsigned j = 0; j < regs_read(inst, i); j++)
             hw_reads_remaining[inst->src[i].nr + j]++;
       }
    }
@@ -702,7 +702,7 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be)
          reads_remaining[inst->src[i].nr]--;
       } else if (inst->src[i].file == FIXED_GRF &&
                  inst->src[i].nr < hw_reg_count) {
-         for (int off = 0; off < inst->regs_read(i); off++)
+         for (unsigned off = 0; off < regs_read(inst, i); off++)
             hw_reads_remaining[inst->src[i].nr + off]--;
       }
    }
@@ -731,7 +731,7 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
 
       if (inst->src[i].file == FIXED_GRF &&
           inst->src[i].nr < hw_reg_count) {
-         for (int off = 0; off < inst->regs_read(i); off++) {
+         for (unsigned off = 0; off < regs_read(inst, i); off++) {
             int reg = inst->src[i].nr + off;
             if (!BITSET_TEST(hw_liveout[block_idx], reg) &&
                 hw_reads_remaining[reg] == 1) {
@@ -1004,17 +1004,17 @@ fs_instruction_scheduler::calculate_deps()
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == VGRF) {
             if (post_reg_alloc) {
-               for (int r = 0; r < inst->regs_read(i); r++)
+               for (unsigned r = 0; r < regs_read(inst, i); r++)
                   add_dep(last_grf_write[inst->src[i].nr + r], n);
             } else {
-               for (int r = 0; r < inst->regs_read(i); r++) {
+               for (unsigned r = 0; r < regs_read(inst, i); r++) {
                   add_dep(last_grf_write[inst->src[i].nr * 16 +
                                          inst->src[i].offset / REG_SIZE + r], n);
                }
             }
          } else if (inst->src[i].file == FIXED_GRF) {
             if (post_reg_alloc) {
-               for (int r = 0; r < inst->regs_read(i); r++)
+               for (unsigned r = 0; r < regs_read(inst, i); r++)
                   add_dep(last_grf_write[inst->src[i].nr + r], n);
             } else {
                add_dep(last_fixed_grf_write, n);
@@ -1052,12 +1052,12 @@ fs_instruction_scheduler::calculate_deps()
       /* write-after-write deps. */
       if (inst->dst.file == VGRF) {
          if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written; r++) {
+            for (unsigned r = 0; r < regs_written(inst); r++) {
                add_dep(last_grf_write[inst->dst.nr + r], n);
                last_grf_write[inst->dst.nr + r] = n;
             }
          } else {
-            for (int r = 0; r < inst->regs_written; r++) {
+            for (unsigned r = 0; r < regs_written(inst); r++) {
                add_dep(last_grf_write[inst->dst.nr * 16 +
                                       inst->dst.offset / REG_SIZE + r], n);
                last_grf_write[inst->dst.nr * 16 +
@@ -1079,7 +1079,7 @@ fs_instruction_scheduler::calculate_deps()
          }
       } else if (inst->dst.file == FIXED_GRF) {
          if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written; r++)
+            for (unsigned r = 0; r < regs_written(inst); r++)
                last_grf_write[inst->dst.nr + r] = n;
          } else {
             last_fixed_grf_write = n;
@@ -1130,17 +1130,17 @@ fs_instruction_scheduler::calculate_deps()
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == VGRF) {
             if (post_reg_alloc) {
-               for (int r = 0; r < inst->regs_read(i); r++)
+               for (unsigned r = 0; r < regs_read(inst, i); r++)
                   add_dep(n, last_grf_write[inst->src[i].nr + r], 0);
             } else {
-               for (int r = 0; r < inst->regs_read(i); r++) {
+               for (unsigned r = 0; r < regs_read(inst, i); r++) {
                   add_dep(n, last_grf_write[inst->src[i].nr * 16 +
                                             inst->src[i].offset / REG_SIZE + r], 0);
                }
             }
          } else if (inst->src[i].file == FIXED_GRF) {
             if (post_reg_alloc) {
-               for (int r = 0; r < inst->regs_read(i); r++)
+               for (unsigned r = 0; r < regs_read(inst, i); r++)
                   add_dep(n, last_grf_write[inst->src[i].nr + r], 0);
             } else {
                add_dep(n, last_fixed_grf_write, 0);
@@ -1180,10 +1180,10 @@ fs_instruction_scheduler::calculate_deps()
        */
       if (inst->dst.file == VGRF) {
          if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written; r++)
+            for (unsigned r = 0; r < regs_written(inst); r++)
                last_grf_write[inst->dst.nr + r] = n;
          } else {
-            for (int r = 0; r < inst->regs_written; r++) {
+            for (unsigned r = 0; r < regs_written(inst); r++) {
                last_grf_write[inst->dst.nr * 16 +
                               inst->dst.offset / REG_SIZE + r] = n;
             }
@@ -1203,7 +1203,7 @@ fs_instruction_scheduler::calculate_deps()
          }
       } else if (inst->dst.file == FIXED_GRF) {
          if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written; r++)
+            for (unsigned r = 0; r < regs_written(inst); r++)
                last_grf_write[inst->dst.nr + r] = n;
          } else {
             last_fixed_grf_write = n;