Made EOT work correctly Extended public API
authorBenjamin Segovia <segovia.benjamin@gmail.com>
Wed, 28 Mar 2012 18:00:03 +0000 (18:00 +0000)
committerKeith Packard <keithp@keithp.com>
Fri, 10 Aug 2012 23:15:51 +0000 (16:15 -0700)
backend/src/gen/brw_defines.h
backend/src/gen/brw_disasm.c
backend/src/gen/brw_eu.c
backend/src/gen/brw_eu.h
backend/src/gen/brw_eu_emit.c
backend/src/gen/program.cpp
backend/src/gen/program.h
backend/src/gen/program.hpp

index e991a84..956242b 100644 (file)
 #ifndef BRW_DEFINES_H
 #define BRW_DEFINES_H
 
-/* 3D state:
- */
-#define PIPE_CONTROL_NOWRITE          0x00
-#define PIPE_CONTROL_WRITEIMMEDIATE   0x01
-#define PIPE_CONTROL_WRITEDEPTH       0x02
-#define PIPE_CONTROL_WRITETIMESTAMP   0x03
-
-#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
-#define PIPE_CONTROL_GTTWRITE_GLOBAL        0x01
-
-#define CMD_3D_PRIM                                 0x7b00 /* 3DPRIMITIVE */
-/* DW0 */
-# define GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT            10
-# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15)
-# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 15)
-/* DW1 */
-# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
-# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 8)
-
-#define _3DPRIM_POINTLIST         0x01
-#define _3DPRIM_LINELIST          0x02
-#define _3DPRIM_LINESTRIP         0x03
-#define _3DPRIM_TRILIST           0x04
-#define _3DPRIM_TRISTRIP          0x05
-#define _3DPRIM_TRIFAN            0x06
-#define _3DPRIM_QUADLIST          0x07
-#define _3DPRIM_QUADSTRIP         0x08
-#define _3DPRIM_LINELIST_ADJ      0x09
-#define _3DPRIM_LINESTRIP_ADJ     0x0A
-#define _3DPRIM_TRILIST_ADJ       0x0B
-#define _3DPRIM_TRISTRIP_ADJ      0x0C
-#define _3DPRIM_TRISTRIP_REVERSE  0x0D
-#define _3DPRIM_POLYGON           0x0E
-#define _3DPRIM_RECTLIST          0x0F
-#define _3DPRIM_LINELOOP          0x10
-#define _3DPRIM_POINTLIST_BF      0x11
-#define _3DPRIM_LINESTRIP_CONT    0x12
-#define _3DPRIM_LINESTRIP_BF      0x13
-#define _3DPRIM_LINESTRIP_CONT_BF 0x14
-#define _3DPRIM_TRIFAN_NOSTIPPLE  0x15
-
 #define BRW_ANISORATIO_2     0 
 #define BRW_ANISORATIO_4     1 
 #define BRW_ANISORATIO_6     2 
@@ -676,7 +635,7 @@ enum opcode {
 
 #define BRW_ARCHITECTURE_REGISTER_FILE    0
 #define BRW_GENERAL_REGISTER_FILE         1
-#define BRW_MESSAGE_REGISTER_FILE         2
+// #define BRW_MESSAGE_REGISTER_FILE         2
 #define BRW_IMMEDIATE_VALUE               3
 
 #define BRW_REGISTER_TYPE_UD  0
@@ -1482,12 +1441,8 @@ enum brw_wm_barycentric_interp_mode {
 
 #define CMD_MI_FLUSH                  0x0200
 
-
-/* Bitfields for the URB_WRITE message, DW2 of message header: */
-#define URB_WRITE_PRIM_END             0x1
-#define URB_WRITE_PRIM_START           0x2
-#define URB_WRITE_PRIM_TYPE_SHIFT      2
-
+#define BRW_DEREFERENCE_URB 0
+#define BRW_DO_NOT_DEREFERENCE_URB 1
 
 /* Maximum number of entries that can be addressed using a binding table
  * pointer of type SURFTYPE_BUFFER
index 9e97285..d0b7d58 100644 (file)
@@ -463,10 +463,6 @@ static int reg (FILE *file, uint32_t _reg_file, uint32_t _reg_nr)
 {
     int        err = 0;
 
-    /* Clear the Compr4 instruction compression bit. */
-    if (_reg_file == BRW_MESSAGE_REGISTER_FILE)
-       _reg_nr &= ~(1 << 7);
-
     if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
         switch (_reg_nr & 0xf0) {
         case BRW_ARF_NULL:
@@ -570,10 +566,7 @@ static int dest_3src (FILE *file, struct brw_instruction *inst)
     int        err = 0;
     uint32_t reg_file;
 
-    if (inst->bits1.da3src.dest_reg_file)
-       reg_file = BRW_MESSAGE_REGISTER_FILE;
-    else
-       reg_file = BRW_GENERAL_REGISTER_FILE;
+    reg_file = BRW_GENERAL_REGISTER_FILE;
 
     err |= reg (file, reg_file, inst->bits1.da3src.dest_reg_nr);
     if (err == -1)
@@ -1302,15 +1295,8 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
         if (gen >= 6)
             err |= qtr_ctrl (file, inst);
         else {
-            if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED &&
-                opcode[inst->header.opcode].ndst > 0 &&
-                inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE &&
-                inst->bits1.da1.dest_reg_nr & (1 << 7)) {
-                format (file, " compr4");
-            } else {
-                err |= control (file, "compression control", compr_ctrl,
-                                inst->header.compression_control, &space);
-            }
+          err |= control (file, "compression control", compr_ctrl,
+              inst->header.compression_control, &space);
         }
 
         err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
index 50031fc..51001db 100644 (file)
@@ -62,7 +62,7 @@ brw_swap_cmod(uint32_t cmod)
  */
 void brw_set_predicate_control_flag_value(struct brw_compile *p, uint32_t value)
 {
-   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+//   p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
    if (value != 0xff) {
       if (value != p->flag_value) {
@@ -72,34 +72,35 @@ void brw_set_predicate_control_flag_value(struct brw_compile *p, uint32_t value)
          brw_pop_insn_state(p);
       }
 
-      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+//      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
    }   
 }
 
 void brw_set_predicate_control(struct brw_compile *p, uint32_t pc)
 {
-   p->current->header.predicate_control = pc;
+  // p->current->header.predicate_control = pc;
 }
 
 void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse)
 {
-   p->current->header.predicate_inverse = predicate_inverse;
+  // p->current->header.predicate_inverse = predicate_inverse;
 }
 
 void brw_set_conditionalmod(struct brw_compile *p, uint32_t conditional)
 {
-   p->current->header.destreg__conditionalmod = conditional;
//  p->current->header.destreg__conditionalmod = conditional;
 }
 
 void brw_set_access_mode(struct brw_compile *p, uint32_t access_mode)
 {
-   p->current->header.access_mode = access_mode;
+  // p->current->header.access_mode = access_mode;
 }
 
 void
 brw_set_compression_control(struct brw_compile *p,
                             enum brw_compression compression_control)
 {
+#if 0
    p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED);
 
    if (p->gen >= 6) {
@@ -131,16 +132,17 @@ brw_set_compression_control(struct brw_compile *p,
    } else {
       p->current->header.compression_control = compression_control;
    }
+#endif
 }
 
 void brw_set_mask_control(struct brw_compile *p, uint32_t value)
 {
-   p->current->header.mask_control = value;
+//   p->current->header.mask_control = value;
 }
 
 void brw_set_saturate(struct brw_compile *p, uint32_t value)
 {
-   p->current->header.saturate = value;
+//   p->current->header.saturate = value;
 }
 
 #if 0
@@ -153,17 +155,21 @@ void brw_set_acc_write_control(struct brw_compile *p, uint32_t value)
 
 void brw_push_insn_state(struct brw_compile *p)
 {
+#if 0
    assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
    memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
    p->compressed_stack[p->current - p->stack] = p->compressed;
    p->current++;   
+#endif
 }
 
 void brw_pop_insn_state(struct brw_compile *p)
 {
+#if 0
    assert(p->current != p->stack);
    p->current--;
    p->compressed = p->compressed_stack[p->current - p->stack];
+#endif
 }
 
 
index 29df8df..c567d4d 100644 (file)
@@ -122,7 +122,6 @@ struct brw_compile {
   /* Allow clients to push/pop instruction state */
   struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
   bool compressed_stack[BRW_EU_MAX_INSN_STACK];
-  struct brw_instruction *current;
 
   uint32_t flag_value;
   bool single_program_flow;
@@ -183,8 +182,6 @@ static inline struct brw_reg brw_reg(uint32_t file,
    struct brw_reg reg;
    if (file == BRW_GENERAL_REGISTER_FILE)
       assert(nr < BRW_MAX_GRF);
-   else if (file == BRW_MESSAGE_REGISTER_FILE)
-      assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
    else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
       assert(nr <= BRW_ARF_IP);
 
@@ -561,14 +558,6 @@ static inline struct brw_reg brw_mask_reg(uint32_t subnr)
                       subnr);
 }
 
-static inline struct brw_reg brw_message_reg(uint32_t nr)
-{
-   assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
-   return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
-                       nr,
-                       0);
-}
-
 /* This is almost always called with a numeric constant argument, so
  * make things easy to evaluate at compile time:
  */
index d8ea917..1476f8f 100644 (file)
@@ -44,63 +44,13 @@ static void guess_execution_size(struct brw_compile *p,
       insn->header.execution_size = reg.width;        /* note - definitions are compatible */
 }
 
-/**
- * Prior to Sandybridge, the SEND instruction accepted non-MRF source
- * registers, implicitly moving the operand to a message register.
- *
- * On Sandybridge, this is no longer the case.  This function performs the
- * explicit move; it should be called before emitting a SEND instruction.
- */
-void
-gen6_resolve_implied_move(struct brw_compile *p,
-                          struct brw_reg *src,
-                          uint32_t msg_reg_nr)
-{
-   if (p->gen < 6)
-      return;
-
-   if (src->file == BRW_MESSAGE_REGISTER_FILE)
-      return;
-
-   if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
-      brw_push_insn_state(p);
-      brw_set_mask_control(p, BRW_MASK_DISABLE);
-      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-      brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
-              retype(*src, BRW_REGISTER_TYPE_UD));
-      brw_pop_insn_state(p);
-   }
-   *src = brw_message_reg(msg_reg_nr);
-}
-
-static void
-gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
-{
-   /* From the BSpec / ISA Reference / send - [DevIVB+]:
-    * "The send with EOT should use register space R112-R127 for <src>. This is
-    *  to enable loading of a new thread into the same slot while the message
-    *  with EOT for current thread is pending dispatch."
-    *
-    * Since we're pretending to have 16 MRFs anyway, we may as well use the
-    * registers required for messages with EOT.
-    */
-   if (p->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
-      reg->file = BRW_GENERAL_REGISTER_FILE;
-      reg->nr += GEN7_MRF_HACK_START;
-   }
-}
-
-
 void
 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
              struct brw_reg dest)
 {
-   if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
-       dest.file != BRW_MESSAGE_REGISTER_FILE)
+   if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(dest.nr < 128);
 
-   gen7_convert_mrf_to_grf(p, &dest);
-
    insn->bits1.da1.dest_reg_file = dest.file;
    insn->bits1.da1.dest_reg_type = dest.type;
    insn->bits1.da1.dest_address_mode = dest.address_mode;
@@ -230,8 +180,6 @@ brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
    if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
 
-   gen7_convert_mrf_to_grf(p, &reg);
-
    validate_reg(insn, reg);
 
    insn->bits1.da1.src0_reg_file = reg.file;
@@ -306,12 +254,8 @@ void brw_set_src1(struct brw_compile *p,
                   struct brw_instruction *insn,
                   struct brw_reg reg)
 {
-   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
-
    assert(reg.nr < 128);
 
-   gen7_convert_mrf_to_grf(p, &reg);
-
    validate_reg(insn, reg);
 
    insn->bits1.da1.src1_reg_file = reg.file;
@@ -543,7 +487,6 @@ brw_next_insn(struct brw_compile *p, uint32_t opcode)
 {
    struct brw_instruction *insn;
    insn = &p->store[p->nr_insn++];
-   memcpy(insn, p->current, sizeof(*insn));
    insn->header.opcode = opcode;
    return insn;
 }
@@ -592,16 +535,13 @@ static struct brw_instruction *brw_alu3(struct brw_compile *p,
 {
    struct brw_instruction *insn = next_insn(p, opcode);
 
-   gen7_convert_mrf_to_grf(p, &dest);
-
    assert(insn->header.access_mode == BRW_ALIGN_16);
 
-   assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
-          dest.file == BRW_MESSAGE_REGISTER_FILE);
+   assert(dest.file == BRW_GENERAL_REGISTER_FILE);
    assert(dest.nr < 128);
    assert(dest.address_mode == BRW_ADDRESS_DIRECT);
    assert(dest.type = BRW_REGISTER_TYPE_F);
-   insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
+   insn->bits1.da3src.dest_reg_file = 0;
    insn->bits1.da3src.dest_reg_nr = dest.nr;
    insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
    insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
@@ -803,7 +743,7 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p,
    insn->header.compression_control = BRW_COMPRESSION_NONE;
    insn->header.mask_control = BRW_MASK_DISABLE;
 
-   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+   // p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
    return insn;
 }
@@ -826,7 +766,7 @@ void brw_CMP(struct brw_compile *p,
    brw_set_src1(p, insn, src1);
 
 /*    guess_execution_size(insn, src0); */
-
+#if 0
 
    /* Make it so that future instructions will use the computed flag
     * value until brw_set_predicate_control_flag_value() is called
@@ -837,6 +777,7 @@ void brw_CMP(struct brw_compile *p,
       p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
       p->flag_value = 0xff;
    }
+#endif
 }
 
 /* Issue 'wait' instruction for n1, host could program MMIO
@@ -1088,7 +1029,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
 
       /* set message header global offset field (reg 0, element 2) */
       brw_MOV(p,
-              retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+              retype(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE,
                                   mrf.nr,
                                   2), BRW_REGISTER_TYPE_UD),
               brw_imm_ud(offset));
@@ -1195,7 +1136,7 @@ brw_oword_block_read_scratch(struct brw_compile *p,
 
       /* set message header global offset field (reg 0, element 2) */
       brw_MOV(p,
-              retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+              retype(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE,
                                   mrf.nr,
                                   2), BRW_REGISTER_TYPE_UD),
               brw_imm_ud(offset));
@@ -1255,7 +1196,7 @@ void brw_oword_block_read(struct brw_compile *p,
 
    /* set message header global offset field (reg 0, element 2) */
    brw_MOV(p,
-           retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+           retype(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE,
                                mrf.nr,
                                2), BRW_REGISTER_TYPE_UD),
            brw_imm_ud(offset));
@@ -1325,167 +1266,6 @@ void brw_dword_scattered_read(struct brw_compile *p,
 }
 
 /**
- * Read float[4] constant(s) from VS constant buffer.
- * For relative addressing, two float[4] constants will be read into 'dest'.
- * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
- */
-void brw_dp_READ_4_vs(struct brw_compile *p,
-                      struct brw_reg dest,
-                      uint32_t location,
-                      uint32_t bind_table_index)
-{
-   struct brw_instruction *insn;
-   uint32_t msg_reg_nr = 1;
-
-   if (p->gen >= 6)
-      location /= 16;
-
-   /* Setup MRF[1] with location/offset into const buffer */
-   brw_push_insn_state(p);
-   brw_set_access_mode(p, BRW_ALIGN_1);
-   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-   brw_set_mask_control(p, BRW_MASK_DISABLE);
-   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-   brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
-                     BRW_REGISTER_TYPE_UD),
-           brw_imm_ud(location));
-   brw_pop_insn_state(p);
-
-   insn = next_insn(p, BRW_OPCODE_SEND);
-
-   insn->header.predicate_control = BRW_PREDICATE_NONE;
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   insn->header.destreg__conditionalmod = msg_reg_nr;
-   insn->header.mask_control = BRW_MASK_DISABLE;
-
-   brw_set_dest(p, insn, dest);
-   if (p->gen >= 6) {
-      brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
-   } else {
-      brw_set_src0(p, insn, brw_null_reg());
-   }
-
-   brw_set_dp_read_message(p,
-                           insn,
-                           bind_table_index,
-                           0,
-                           BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
-                           BRW_DATAPORT_READ_TARGET_DATA_CACHE,
-                           1, /* msg_length */
-                           1); /* response_length (1 Oword) */
-}
-
-/**
- * Read a float[4] constant per vertex from VS constant buffer, with
- * relative addressing.
- */
-void brw_dp_READ_4_vs_relative(struct brw_compile *p,
-                               struct brw_reg dest,
-                               struct brw_reg addr_reg,
-                               uint32_t offset,
-                               uint32_t bind_table_index)
-{
-   struct brw_reg src = brw_vec8_grf(0, 0);
-   int msg_type;
-
-   /* Setup MRF[1] with offset into const buffer */
-   brw_push_insn_state(p);
-   brw_set_access_mode(p, BRW_ALIGN_1);
-   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-   brw_set_mask_control(p, BRW_MASK_DISABLE);
-   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-
-   /* M1.0 is block offset 0, M1.4 is block offset 1, all other
-    * fields ignored.
-    */
-   brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
-           addr_reg, brw_imm_d(offset));
-   brw_pop_insn_state(p);
-
-   gen6_resolve_implied_move(p, &src, 0);
-   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
-
-   insn->header.predicate_control = BRW_PREDICATE_NONE;
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   insn->header.destreg__conditionalmod = 0;
-   insn->header.mask_control = BRW_MASK_DISABLE;
-
-   brw_set_dest(p, insn, dest);
-   brw_set_src0(p, insn, src);
-
-   msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
-
-   brw_set_dp_read_message(p,
-                           insn,
-                           bind_table_index,
-                           BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
-                           msg_type,
-                           BRW_DATAPORT_READ_TARGET_DATA_CACHE,
-                           2, /* msg_length */
-                           1); /* response_length */
-}
-
-void brw_fb_WRITE(struct brw_compile *p,
-                  int dispatch_width,
-                  uint32_t msg_reg_nr,
-                  struct brw_reg src0,
-                  uint32_t binding_table_index,
-                  uint32_t msg_length,
-                  uint32_t response_length,
-                  bool eot,
-                  bool header_present)
-{
-   struct brw_instruction *insn;
-   uint32_t msg_control, msg_type;
-   struct brw_reg dest;
-
-   if (dispatch_width == 16)
-      dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
-   else
-      dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
-
-   if (p->gen >= 6 && binding_table_index == 0) {
-      insn = next_insn(p, BRW_OPCODE_SENDC);
-   } else {
-      insn = next_insn(p, BRW_OPCODE_SEND);
-   }
-   /* The execution mask is ignored for render target writes. */
-   insn->header.predicate_control = 0;
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-
-   if (p->gen >= 6) {
-      /* headerless version, just submit color payload */
-      src0 = brw_message_reg(msg_reg_nr);
-
-      msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
-   } else {
-      insn->header.destreg__conditionalmod = msg_reg_nr;
-
-      msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
-   }
-
-   if (dispatch_width == 16)
-      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
-   else
-      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
-
-   brw_set_dest(p, insn, dest);
-   brw_set_src0(p, insn, src0);
-   brw_set_dp_write_message(p,
-                            insn,
-                            binding_table_index,
-                            msg_control,
-                            msg_type,
-                            msg_length,
-                            header_present,
-                            eot, /* last render target write */
-                            response_length,
-                            eot,
-                            0 /* send_commit_msg */);
-}
-
-
-/**
  * Texture sample instruction.
  * Note: the msg_type plus msg_length values determine exactly what kind
  * of sampling operation is performed.  See volume 4, page 161 of docs.
@@ -1521,64 +1301,10 @@ void brw_SAMPLE(struct brw_compile *p,
     * instruction, so that is a guide for whether a workaround is
     * needed.
     */
-   if (writemask != WRITEMASK_XYZW) {
-      uint32_t dst_offset = 0;
-      uint32_t i, newmask = 0, len = 0;
-
-      for (i = 0; i < 4; i++) {
-         if (writemask & (1<<i))
-            break;
-         dst_offset += 2;
-      }
-      for (; i < 4; i++) {
-         if (!(writemask & (1<<i)))
-            break;
-         newmask |= 1<<i;
-         len++;
-      }
-
-      if (newmask != writemask) {
-         need_stall = 1;
-         /* printf("need stall %x %x\n", newmask , writemask); */
-      }
-      else {
-         bool dispatch_16 = false;
-
-         struct brw_reg m1 = brw_message_reg(msg_reg_nr);
-
-         guess_execution_size(p, p->current, dest);
-         if (p->current->header.execution_size == BRW_EXECUTE_16)
-            dispatch_16 = true;
-
-         newmask = ~newmask & WRITEMASK_XYZW;
-
-         brw_push_insn_state(p);
-
-         brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-         brw_set_mask_control(p, BRW_MASK_DISABLE);
-
-         brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
-                 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
-           brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 
-
-         brw_pop_insn_state(p);
-
-           src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 
-         dest = offset(dest, dst_offset);
-
-         /* For 16-wide dispatch, masked channels are skipped in the
-          * response.  For 8-wide, masked channels still take up slots,
-          * and are just not written to.
-          */
-         if (dispatch_16)
-            response_length = len * 2;
-      }
-   }
+   assert (writemask == WRITEMASK_XYZW);
 
    {
       struct brw_instruction *insn;
-   
-      gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
       insn = next_insn(p, BRW_OPCODE_SEND);
       insn->header.predicate_control = 0; /* XXX */
@@ -1618,21 +1344,15 @@ brw_EOT(struct brw_compile *p, uint32_t msg_nr)
 {
   struct brw_instruction *insn = NULL;
 
-  brw_MOV(p, brw_message_reg(msg_nr), brw_vec8_grf(0,0));
+  insn = brw_MOV(p, brw_vec8_grf(msg_nr,0), brw_vec8_grf(0,0));
+  insn->header.mask_control = BRW_MASK_DISABLE;
   insn = next_insn(p, BRW_OPCODE_SEND);
   brw_set_dest(p, insn, brw_null_reg());
-  brw_set_src0(p, insn, brw_message_reg(msg_nr));
+  brw_set_src0(p, insn, brw_vec8_grf(msg_nr,0));
   brw_set_src1(p, insn, brw_imm_ud(0));
-
-  insn->header.execution_size = BRW_EXECUTE_1;
-  insn->header.predicate_control = 0; /* XXX */
+  insn->header.execution_size = BRW_EXECUTE_8;
   insn->header.compression_control = BRW_COMPRESSION_NONE;
-  insn->header.execution_size = BRW_EXECUTE_1;
-  insn->bits3.spawner_gen5.opcode = 0;
-  insn->bits3.spawner_gen5.request = 1;
-  insn->bits3.spawner_gen5.resource = 0;
-  insn->bits3.spawner_gen5.header = 0;
-  insn->bits3.spawner_gen5.response_length = 0;
+  insn->bits3.spawner_gen5.resource = BRW_DO_NOT_DEREFERENCE_URB;
   insn->bits3.spawner_gen5.msg_length = 1;
   insn->bits3.spawner_gen5.end_of_thread = 1;
   insn->header.destreg__conditionalmod = BRW_SFID_THREAD_SPAWNER;
index d0e61ef..6792251 100644 (file)
@@ -82,6 +82,7 @@ namespace gen {
       kernel->insns = GBE_NEW_ARRAY(brw_instruction, kernel->insnNum);
       std::memcpy(kernel->insns, p->store, kernel->insnNum * sizeof(brw_instruction));
       GBE_FREE(p);
+      kernels.insert(std::make_pair(name, kernel));
     }
 
     return true;
@@ -161,6 +162,13 @@ const GenKernel *GenProgramGetKernel(const GenProgram *genProgram, uint32_t ID)
 }
 
 GBE_EXPORT_SYMBOL
+const char *GenKernelGetName(const GenKernel *genKernel) {
+  if (genKernel == NULL) return NULL;
+  const gbe::gen::Kernel *kernel = (const gbe::gen::Kernel*) genKernel;
+  return kernel->getName();
+}
+
+GBE_EXPORT_SYMBOL
 const char *GenKernelGetCode(const GenKernel *genKernel) {
   if (genKernel == NULL) return NULL;
   const gbe::gen::Kernel *kernel = (const gbe::gen::Kernel*) genKernel;
@@ -195,3 +203,13 @@ GenArgType GenKernelGetArgType(const GenKernel *genKernel, uint32_t argID) {
   return kernel->getArgType(argID);
 }
 
+GBE_EXPORT_SYMBOL
+uint32_t GenKernelGetSIMDWidth(const GenKernel *kernel) {
+  return 16u;
+}
+
+GBE_EXPORT_SYMBOL
+uint32_t GenKernelGetRequiredWorkGroupSize(const GenKernel *kernel, uint32_t dim) {
+  return 0u;
+}
+
index 97e4e71..9223ced 100644 (file)
@@ -73,7 +73,10 @@ const GenKernel *GenProgramGetKernelByName(const GenProgram *program, const char
 /*! Get the kernel from its ID */
 const GenKernel *GenProgramGetKernel(const GenProgram *program, uint32_t ID);
 
-/*! Get the Gen ISA source code */
+/*! Get the Gen kernel name */
+const char *GenKernelGetName(const GenKernel *kernel);
+
+/*! Get the Gen kernel source code */
 const char *GenKernelGetCode(const GenKernel *kernel);
 
 /*! Get the size of the source code */
@@ -88,6 +91,14 @@ uint32_t GenKernelGetArgSize(const GenKernel *kernel, uint32_t argID);
 /*! Get the type of the given argument */
 enum GenArgType GenKernelGetArgType(const GenKernel *kernel, uint32_t argID);
 
+/*! Get the simd width for the kernel */
+uint32_t GenKernelGetSIMDWidth(const GenKernel *kernel);
+
+/*! Indicates if a work group size is required. Return the required width or 0
+ *  if none
+ */
+uint32_t GenKernelGetRequiredWorkGroupSize(const GenKernel *kernel, uint32_t dim);
+
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */
index 933529a..b5351fc 100644 (file)
@@ -60,6 +60,8 @@ namespace gen {
     INLINE size_t getCodeSize(void) const {
       return insnNum * sizeof(brw_instruction);
     }
+    /*! Get the kernel name */
+    INLINE const char *getName(void) const { return name.c_str(); }
     /*! Return the number of arguments for the kernel call */
     INLINE uint32_t getArgNum(void) const { return argNum; }
     /*! Return the size of the given argument */
@@ -78,7 +80,7 @@ namespace gen {
     }
   private:
     friend class Program;    //!< Owns the kernels
-    std::string name;        //!< Kernel name
+    const std::string name;  //!< Kernel name
     KernelArgument *args;    //!< Each argument
     brw_instruction *insns;  //!< Instruction stream
     uint32_t argNum;         //!< Number of function arguments