r600g/sb: Support LDS ops in SB bytecode I/O
authorGlenn Kennard <glenn.kennard@gmail.com>
Mon, 30 Nov 2015 00:10:23 +0000 (10:10 +1000)
committerDave Airlie <airlied@redhat.com>
Sun, 6 Dec 2015 23:58:59 +0000 (09:58 +1000)
This just adds the LDS ops to the SB bytecode reader/writers.

Signed-off-by: Glenn Kennard <glenn.kennard@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/gallium/drivers/r600/sb/sb_bc.h
src/gallium/drivers/r600/sb/sb_bc_builder.cpp
src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
src/gallium/drivers/r600/sb/sb_bc_dump.cpp

index b0b12ce..b23d482 100644 (file)
@@ -518,6 +518,8 @@ struct bc_alu {
 
        unsigned slot:3;
 
+       unsigned lds_idx_offset:6;
+
        alu_op_flags slot_flags;
 
        void set_op(unsigned op) {
index 55e2a85..5dec169 100644 (file)
@@ -371,6 +371,37 @@ int bc_builder::build_alu(alu_node* n) {
        const bc_alu &bc = n->bc;
        const alu_op_info *aop = bc.op_ptr;
 
+       if (n->bc.op_ptr->flags & AF_LDS) {
+               assert(ctx.is_egcm());
+               bb << ALU_WORD0_LDS_IDX_OP_EGCM()
+                       .SRC0_SEL(bc.src[0].sel)
+                       .SRC0_REL(bc.src[0].rel)
+                       .SRC0_CHAN(bc.src[0].chan)
+                       .IDX_OFFSET_4((bc.lds_idx_offset >> 4) & 1)
+                       .SRC1_SEL(bc.src[1].sel)
+                       .SRC1_REL(bc.src[1].rel)
+                       .SRC1_CHAN(bc.src[1].chan)
+                       .IDX_OFFSET_5((bc.lds_idx_offset >> 5) & 1)
+                       .INDEX_MODE(bc.index_mode)
+                       .PRED_SEL(bc.pred_sel)
+                       .LAST(bc.last);
+
+               bb << ALU_WORD1_LDS_IDX_OP_EGCM()
+                       .SRC2_SEL(bc.src[2].sel)
+                       .SRC2_REL(bc.src[2].rel)
+                       .SRC2_CHAN(bc.src[2].chan)
+                       .IDX_OFFSET_1((bc.lds_idx_offset >> 1) & 1)
+                       .ALU_INST(ctx.alu_opcode(ALU_OP3_LDS_IDX_OP))
+                       .BANK_SWIZZLE(bc.bank_swizzle)
+                       .LDS_OP((bc.op_ptr->opcode[1] >> 8) & 0xff)
+                       .IDX_OFFSET_0((bc.lds_idx_offset >> 0) & 1)
+                       .IDX_OFFSET_2((bc.lds_idx_offset >> 2) & 1)
+                       .DST_CHAN(bc.dst_chan)
+                       .IDX_OFFSET_3((bc.lds_idx_offset >> 3) & 1);
+
+               return 0;
+       }
+
        bb << ALU_WORD0_ALL()
                        .INDEX_MODE(bc.index_mode)
                        .LAST(bc.last)
index 48cbb33..1832e2d 100644 (file)
@@ -310,16 +310,53 @@ int bc_decoder::decode_alu(unsigned & i, bc_alu& bc) {
                ALU_WORD1_OP3_ALL w1(dw1);
                bc.set_op(r600_isa_alu_by_opcode(ctx.isa, w1.get_ALU_INST(), 1));
 
-               bc.bank_swizzle = w1.get_BANK_SWIZZLE();
-               bc.clamp = w1.get_CLAMP();
-               bc.dst_chan = w1.get_DST_CHAN();
-               bc.dst_gpr = w1.get_DST_GPR();
-               bc.dst_rel = w1.get_DST_REL();
+               if (bc.op == ALU_OP3_LDS_IDX_OP) {
+                       ALU_WORD0_LDS_IDX_OP_EGCM iw0(dw0);
+                       ALU_WORD1_LDS_IDX_OP_EGCM iw1(dw1);
+                       bc.index_mode = iw0.get_INDEX_MODE();
+                       bc.last = iw0.get_LAST();
+                       bc.pred_sel = iw0.get_PRED_SEL();
+                       bc.src[0].chan = iw0.get_SRC0_CHAN();
+                       bc.src[0].sel = iw0.get_SRC0_SEL();
+                       bc.src[0].rel = iw0.get_SRC0_REL();
+
+                       bc.src[1].chan = iw0.get_SRC1_CHAN();
+                       bc.src[1].sel = iw0.get_SRC1_SEL();
+                       bc.src[1].rel = iw0.get_SRC1_REL();
+
+                       bc.bank_swizzle = iw1.get_BANK_SWIZZLE();
+                       bc.src[2].chan = iw1.get_SRC2_CHAN();
+                       bc.src[2].sel = iw1.get_SRC2_SEL();
+                       bc.src[2].rel = iw1.get_SRC2_REL();
+                       bc.dst_chan = iw1.get_DST_CHAN();
+                       // TODO: clean up
+                       for (size_t k = 0; k < sizeof(alu_op_table) / sizeof(alu_op_table[0]); k++) {
+                               if (((alu_op_table[k].opcode[1] >> 8) & 0xff) == iw1.get_LDS_OP()) {
+                                       bc.op_ptr = &alu_op_table[k];
+                                       bc.op = k;
+                                       break;
+                               }
+                       }
+                       bc.lds_idx_offset =
+                               (iw0.get_IDX_OFFSET_4() << 4) |
+                               (iw0.get_IDX_OFFSET_5() << 5) |
+                               (iw1.get_IDX_OFFSET_1() << 1) |
+                               (iw1.get_IDX_OFFSET_0() << 0) |
+                               (iw1.get_IDX_OFFSET_2() << 2) |
+                               (iw1.get_IDX_OFFSET_3() << 3);
+               }
+               else {
+                       bc.bank_swizzle = w1.get_BANK_SWIZZLE();
+                       bc.clamp = w1.get_CLAMP();
+                       bc.dst_chan = w1.get_DST_CHAN();
+                       bc.dst_gpr = w1.get_DST_GPR();
+                       bc.dst_rel = w1.get_DST_REL();
 
-               bc.src[2].chan = w1.get_SRC2_CHAN();
-               bc.src[2].sel = w1.get_SRC2_SEL();
-               bc.src[2].neg = w1.get_SRC2_NEG();
-               bc.src[2].rel = w1.get_SRC2_REL();
+                       bc.src[2].chan = w1.get_SRC2_CHAN();
+                       bc.src[2].sel = w1.get_SRC2_SEL();
+                       bc.src[2].neg = w1.get_SRC2_NEG();
+                       bc.src[2].rel = w1.get_SRC2_REL();
+               }
 
        } else { // op2
                if (ctx.is_r600()) {
index 3c051ad..788450b 100644 (file)
@@ -280,6 +280,28 @@ static void print_src(sb_ostream &s, bc_alu &alu, unsigned idx)
                need_sel = 0;
                need_chan = 0;
                switch (sel) {
+               case ALU_SRC_LDS_OQ_A:
+                       s << "LDS_OQ_A";
+                       need_chan = 1;
+                       break;
+               case ALU_SRC_LDS_OQ_B:
+                       s << "LDS_OQ_B";
+                       need_chan = 1;
+                       break;
+               case ALU_SRC_LDS_OQ_A_POP:
+                       s << "LDS_OQ_A_POP";
+                       need_chan = 1;
+                       break;
+               case ALU_SRC_LDS_OQ_B_POP:
+                       s << "LDS_OQ_B_POP";
+                       need_chan = 1;
+                       break;
+               case ALU_SRC_LDS_DIRECT_A:
+                       s << "LDS_A["; s.print_zw_hex(src->value.u, 8); s << "]";
+                       break;
+               case ALU_SRC_LDS_DIRECT_B:
+                       s << "LDS_B["; s.print_zw_hex(src->value.u, 8); s << "]";
+                       break;
                case ALU_SRC_PS:
                        s << "PS";
                        break;
@@ -363,6 +385,10 @@ void bc_dump::dump(alu_node& n) {
                }
        }
 
+       if (n.bc.lds_idx_offset) {
+               s << " IDX_OFFSET:" << n.bc.lds_idx_offset;
+       }
+
        sblog << s.str() << "\n";
 }