i965/vec4: teach CSE about exec_size, group and doubles
authorIago Toral Quiroga <itoral@igalia.com>
Thu, 16 Jun 2016 11:49:55 +0000 (13:49 +0200)
committerSamuel Iglesias Gonsálvez <siglesias@igalia.com>
Tue, 3 Jan 2017 10:26:51 +0000 (11:26 +0100)
v2: adapt to changes in offset()

Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_vec4_cse.cpp

index eed2478..2e65ef7 100644 (file)
@@ -130,6 +130,8 @@ instructions_match(vec4_instruction *a, vec4_instruction *b)
           a->dst.writemask == b->dst.writemask &&
           a->force_writemask_all == b->force_writemask_all &&
           a->size_written == b->size_written &&
+          a->exec_size == b->exec_size &&
+          a->group == b->group &&
           operands_match(a, b);
 }
 
@@ -181,10 +183,16 @@ vec4_visitor::opt_cse_local(bblock_t *block)
                                               regs_written(entry->generator)),
                                            NULL), inst->dst.type);
 
-               for (unsigned i = 0; i < regs_written(entry->generator); ++i) {
+               const unsigned width = entry->generator->exec_size;
+               unsigned component_size = width * type_sz(entry->tmp.type);
+               unsigned num_copy_movs =
+                  DIV_ROUND_UP(entry->generator->size_written, component_size);
+               for (unsigned i = 0; i < num_copy_movs; ++i) {
                   vec4_instruction *copy =
-                     MOV(byte_offset(entry->generator->dst, i * REG_SIZE),
-                         byte_offset(entry->tmp, i * REG_SIZE));
+                     MOV(offset(entry->generator->dst, width, i),
+                         offset(entry->tmp, width, i));
+                  copy->exec_size = width;
+                  copy->group = entry->generator->group;
                   copy->force_writemask_all =
                      entry->generator->force_writemask_all;
                   entry->generator->insert_after(block, copy);
@@ -196,11 +204,16 @@ vec4_visitor::opt_cse_local(bblock_t *block)
             /* dest <- temp */
             if (!inst->dst.is_null()) {
                assert(inst->dst.type == entry->tmp.type);
-
-               for (unsigned i = 0; i < regs_written(inst); ++i) {
+               const unsigned width = inst->exec_size;
+               unsigned component_size = width * type_sz(inst->dst.type);
+               unsigned num_copy_movs =
+                  DIV_ROUND_UP(inst->size_written, component_size);
+               for (unsigned i = 0; i < num_copy_movs; ++i) {
                   vec4_instruction *copy =
-                     MOV(byte_offset(inst->dst, i * REG_SIZE),
-                         byte_offset(entry->tmp, i * REG_SIZE));
+                     MOV(offset(inst->dst, width, i),
+                         offset(entry->tmp, width, i));
+                  copy->exec_size = inst->exec_size;
+                  copy->group = inst->group;
                   copy->force_writemask_all = inst->force_writemask_all;
                   inst->insert_before(block, copy);
                }