i965: Add a SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT opcode.
authorKenneth Graunke <kenneth@whitecape.org>
Sat, 7 Nov 2015 09:37:33 +0000 (01:37 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Sat, 14 Nov 2015 00:11:02 +0000 (16:11 -0800)
We need to use per-slot offsets when there's non-uniform indexing,
as each SIMD channel could have a different index.  We want to use
them for any non-constant index (even if uniform), as it lives in
the message header instead of the descriptor, allowing us to set
offsets in GRFs rather than immediates.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_generator.cpp
src/mesa/drivers/dri/i965/brw_shader.cpp

index 5044982..6484484 100644 (file)
@@ -1055,13 +1055,10 @@ enum opcode {
    SHADER_OPCODE_GEN7_SCRATCH_READ,
 
    /**
-    * Gen8+ SIMD8 URB Read message.
-    *
-    * Source 0: The header register, containing URB handles (g1).
-    *
-    * Currently only supports constant offsets, in inst->offset.
+    * Gen8+ SIMD8 URB Read messages.
     */
    SHADER_OPCODE_URB_READ_SIMD8,
+   SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT,
 
    SHADER_OPCODE_URB_WRITE_SIMD8,
    SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT,
index b8d48da..80b8c8e 100644 (file)
@@ -285,6 +285,7 @@ fs_inst::is_send_from_grf() const
    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
    case SHADER_OPCODE_URB_READ_SIMD8:
+   case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
       return true;
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
       return src[1].file == VGRF;
@@ -807,6 +808,7 @@ fs_inst::regs_read(int arg) const
    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
    case SHADER_OPCODE_URB_READ_SIMD8:
+   case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
    case SHADER_OPCODE_UNTYPED_ATOMIC:
    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
index e986021..139cda3 100644 (file)
@@ -387,6 +387,9 @@ fs_generator::generate_urb_read(fs_inst *inst,
    brw_inst_set_sfid(p->devinfo, send, BRW_SFID_URB);
    brw_inst_set_urb_opcode(p->devinfo, send, GEN8_URB_OPCODE_SIMD8_READ);
 
+   if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT)
+      brw_inst_set_urb_per_slot_offset(p->devinfo, send, true);
+
    brw_inst_set_mlen(p->devinfo, send, inst->mlen);
    brw_inst_set_rlen(p->devinfo, send, inst->regs_written);
    brw_inst_set_header_present(p->devinfo, send, true);
@@ -2077,6 +2080,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
         break;
 
       case SHADER_OPCODE_URB_READ_SIMD8:
+      case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
          generate_urb_read(inst, dst, src[0]);
          break;
 
index a0c74a2..a438e18 100644 (file)
@@ -429,6 +429,8 @@ brw_instruction_name(enum opcode op)
       return "gen8_urb_write_simd8_masked_per_slot";
    case SHADER_OPCODE_URB_READ_SIMD8:
       return "urb_read_simd8";
+   case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
+      return "urb_read_simd8_per_slot";
 
    case SHADER_OPCODE_FIND_LIVE_CHANNEL:
       return "find_live_channel";