}
INLINE bool needToSplitAlu1(GenEncoder *p, GenRegister dst, GenRegister src) {
- if (p->curr.execWidth != 16) return false;
+ if (p->curr.execWidth != 16 || src.hstride == GEN_HORIZONTAL_STRIDE_0) return false;
if (isVectorOfBytes(dst) == true) return true;
if (isVectorOfBytes(src) == true) return true;
return false;
}
INLINE bool needToSplitAlu2(GenEncoder *p, GenRegister dst, GenRegister src0, GenRegister src1) {
- if (p->curr.execWidth != 16) return false;
+ if (p->curr.execWidth != 16 ||
+ (src0.hstride == GEN_HORIZONTAL_STRIDE_0 &&
+ src1.hstride == GEN_HORIZONTAL_STRIDE_0))
+ return false;
if (isVectorOfBytes(dst) == true) return true;
if (isVectorOfBytes(src0) == true) return true;
if (isVectorOfBytes(src1) == true) return true;
}
INLINE bool needToSplitCmp(GenEncoder *p, GenRegister src0, GenRegister src1) {
- if (p->curr.execWidth != 16) return false;
+ if (p->curr.execWidth != 16 ||
+ (src0.hstride == GEN_HORIZONTAL_STRIDE_0 &&
+ src1.hstride == GEN_HORIZONTAL_STRIDE_0))
+ return false;
if (isVectorOfBytes(src0) == true) return true;
if (isVectorOfBytes(src1) == true) return true;
if (src0.type == GEN_TYPE_D || src0.type == GEN_TYPE_UD || src0.type == GEN_TYPE_F)
return false;
}
-
void GenEncoder::setMessageDescriptor(GenNativeInstruction *inst, enum GenMessageTarget sfid,
unsigned msg_length, unsigned response_length,
bool header_present, bool end_of_thread)
insn->bits1.da1.dest_address_mode = dest.address_mode;
insn->bits1.da1.dest_reg_nr = dest.nr;
insn->bits1.da1.dest_subreg_nr = dest.subnr;
- if (dest.hstride == GEN_HORIZONTAL_STRIDE_0)
- dest.hstride = GEN_HORIZONTAL_STRIDE_1;
+ if (dest.hstride == GEN_HORIZONTAL_STRIDE_0) {
+ if (dest.type == GEN_TYPE_UB || dest.type == GEN_TYPE_B)
+ dest.hstride = GEN_HORIZONTAL_STRIDE_4;
+ else if (dest.type == GEN_TYPE_UW || dest.type == GEN_TYPE_W)
+ dest.hstride = GEN_HORIZONTAL_STRIDE_2;
+ else
+ dest.hstride = GEN_HORIZONTAL_STRIDE_1;
+ }
insn->bits1.da1.dest_horiz_stride = dest.hstride;
}
narrowDst = 0;
}
+ sel.push();
+ if (sel.isScalarReg(insn.getDst(0)) == true) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+
for(int i = 0; i < narrowNum; i++, index++) {
GenRegister narrowReg, wideReg;
if(narrowDst) {
} else
sel.MOV(xdst, xsrc);
}
+ sel.pop();
return true;
}
} else if (opcode == OP_F32TO16) {
GenRegister unpacked;
unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
- sel.F32TO16(unpacked, src);
+ sel.push();
+ if (sel.isScalarReg(insn.getSrc(0))) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+ sel.F32TO16(unpacked, src);
+ sel.pop();
sel.MOV(dst, unpacked);
} else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) {
GenRegister unpacked;
tmp.type = GEN_TYPE_D;
sel.CONVI64_TO_I(tmp, src);
sel.MOV(unpacked, tmp);
- } else
- sel.MOV(unpacked, src);
+ } else {
+ sel.push();
+ if (sel.isScalarReg(insn.getSrc(0))) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+ sel.MOV(unpacked, src);
+ sel.pop();
+ }
sel.MOV(dst, unpacked);
} else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) {
sel.CONVI64_TO_I(dst, src);
|| ctx.reservedSpillRegs != 0)
this->expireGRF(interval);
tick++;
+ // For some scalar byte register, it may be used as a destination register
+ // and the source is a scalar Dword. If that is the case, the byte register
+ // must get 4byte alignment register offset.
+ alignment = (alignment + 3) & ~3;
while ((grfOffset = ctx.allocate(size, alignment)) == 0) {
const bool success = this->expireGRF(interval);
if (success == false) {