From: Zhigang Gong Date: Thu, 29 May 2014 01:26:24 +0000 (+0800) Subject: GBE: fix uniform/scalar related bugs. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1b8042620e32b05334f6bbbd87bde7ee4b4a8a03;p=contrib%2Fbeignet.git GBE: fix uniform/scalar related bugs. One major fix is that even a register is a scalar, when we move a scalar Dword to a scalar Byte, we have to set the hstride to 4, otherwise, it breaks the following register restication: B. When the Execution Data Type is wider than the destination data type, the destination must be aligned as required by the wider execution data type and specify a HorzStride equal to the ratio in sizes of the two data types. For example, a mov with a D source and B destination must use a 4-byte aligned destination and a Dst.HorzStride of 4. The following instruction may doesn't take effect. mov.sat(1) g127.4<1>:B g126<0,1,0>:D We have to change it to mov.sat(1) g127.4<4>:B g126<0,1,0>:D v2: keep the instruction selection stage unchanged, we fix this restircation in setDst only. Signed-off-by: Zhigang Gong Reviewed-by: "Song, Ruiling" --- diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index 0091e81..ed2fd32 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -68,14 +68,17 @@ namespace gbe } INLINE bool needToSplitAlu1(GenEncoder *p, GenRegister dst, GenRegister src) { - if (p->curr.execWidth != 16) return false; + if (p->curr.execWidth != 16 || src.hstride == GEN_HORIZONTAL_STRIDE_0) return false; if (isVectorOfBytes(dst) == true) return true; if (isVectorOfBytes(src) == true) return true; return false; } INLINE bool needToSplitAlu2(GenEncoder *p, GenRegister dst, GenRegister src0, GenRegister src1) { - if (p->curr.execWidth != 16) return false; + if (p->curr.execWidth != 16 || + (src0.hstride == GEN_HORIZONTAL_STRIDE_0 && + src1.hstride == GEN_HORIZONTAL_STRIDE_0)) + return false; if (isVectorOfBytes(dst) == true) return true; if (isVectorOfBytes(src0) == true) return true; if (isVectorOfBytes(src1) == true) return true; @@ -83,7 +86,10 @@ namespace gbe } INLINE bool needToSplitCmp(GenEncoder *p, GenRegister src0, GenRegister src1) { - if (p->curr.execWidth != 16) return false; + if (p->curr.execWidth != 16 || + (src0.hstride == GEN_HORIZONTAL_STRIDE_0 && + src1.hstride == GEN_HORIZONTAL_STRIDE_0)) + return false; if (isVectorOfBytes(src0) == true) return true; if (isVectorOfBytes(src1) == true) return true; if (src0.type == GEN_TYPE_D || src0.type == GEN_TYPE_UD || src0.type == GEN_TYPE_F) @@ -93,7 +99,6 @@ namespace gbe return false; } - void GenEncoder::setMessageDescriptor(GenNativeInstruction *inst, enum GenMessageTarget sfid, unsigned msg_length, unsigned response_length, bool header_present, bool end_of_thread) @@ -268,8 +273,14 @@ namespace gbe insn->bits1.da1.dest_address_mode = dest.address_mode; insn->bits1.da1.dest_reg_nr = dest.nr; insn->bits1.da1.dest_subreg_nr = dest.subnr; - if (dest.hstride == GEN_HORIZONTAL_STRIDE_0) - dest.hstride = GEN_HORIZONTAL_STRIDE_1; + if (dest.hstride == GEN_HORIZONTAL_STRIDE_0) { + if (dest.type == GEN_TYPE_UB || dest.type == GEN_TYPE_B) + dest.hstride = GEN_HORIZONTAL_STRIDE_4; + else if (dest.type == GEN_TYPE_UW || dest.type == GEN_TYPE_W) + dest.hstride = GEN_HORIZONTAL_STRIDE_2; + else + dest.hstride = GEN_HORIZONTAL_STRIDE_1; + } insn->bits1.da1.dest_horiz_stride = dest.hstride; } diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index cf0af9d..19921d4 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -3076,6 +3076,13 @@ namespace gbe narrowDst = 0; } + sel.push(); + if (sel.isScalarReg(insn.getDst(0)) == true) { + sel.curr.execWidth = 1; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.noMask = 1; + } + for(int i = 0; i < narrowNum; i++, index++) { GenRegister narrowReg, wideReg; if(narrowDst) { @@ -3120,6 +3127,7 @@ namespace gbe } else sel.MOV(xdst, xsrc); } + sel.pop(); return true; } @@ -3154,7 +3162,14 @@ namespace gbe } else if (opcode == OP_F32TO16) { GenRegister unpacked; unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0)))); - sel.F32TO16(unpacked, src); + sel.push(); + if (sel.isScalarReg(insn.getSrc(0))) { + sel.curr.execWidth = 1; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.noMask = 1; + } + sel.F32TO16(unpacked, src); + sel.pop(); sel.MOV(dst, unpacked); } else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) { GenRegister unpacked; @@ -3172,8 +3187,16 @@ namespace gbe tmp.type = GEN_TYPE_D; sel.CONVI64_TO_I(tmp, src); sel.MOV(unpacked, tmp); - } else - sel.MOV(unpacked, src); + } else { + sel.push(); + if (sel.isScalarReg(insn.getSrc(0))) { + sel.curr.execWidth = 1; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.noMask = 1; + } + sel.MOV(unpacked, src); + sel.pop(); + } sel.MOV(dst, unpacked); } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) { sel.CONVI64_TO_I(dst, src); diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index f642c2e..3d8b0b3 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -941,6 +941,10 @@ namespace gbe || ctx.reservedSpillRegs != 0) this->expireGRF(interval); tick++; + // For some scalar byte register, it may be used as a destination register + // and the source is a scalar Dword. If that is the case, the byte register + // must get 4byte alignment register offset. + alignment = (alignment + 3) & ~3; while ((grfOffset = ctx.allocate(size, alignment)) == 0) { const bool success = this->expireGRF(interval); if (success == false) {