p->push();
p->curr.predicate = GEN_PREDICATE_NONE;
p->curr.noMask = 1;
- p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0, src1);
+ p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0,
+ GenRegister::h2(GenRegister::retype(src1, GEN_TYPE_UW)));
p->curr.accWrEnable = 1;
p->MACH(tmp, src0, src1);
p->pop();
p->push();
p->curr.execWidth = 8;
for(int i = 0; i < execWidth; i += 8) {
- p->MUL(acc, src0, src1);
+ p->MUL(acc, src0, GenRegister::h2(GenRegister::retype(src1, GEN_TYPE_UW)));
p->curr.accWrEnable = 1;
p->MACH(high, src0, src1);
p->curr.accWrEnable = 0;
/*! should add per thread offset to the local memory address when load/store/atomic */
bool needPatchSLMAddr() const { return patchSLMAddr; }
void setPatchSLMAddr(bool b) { patchSLMAddr = b; }
+ bool has32X32Mul() const { return bHas32X32Mul; }
+ void setHas32X32Mul(bool b) { bHas32X32Mul = b; }
/*! indicate whether a register is a scalar/uniform register. */
INLINE bool isScalarReg(const ir::Register ®) const {
const ir::RegisterData ®Data = getRegisterData(reg);
/*! Auxiliary label for if/endif. */
uint16_t currAuxLabel;
bool patchSLMAddr;
+ bool bHas32X32Mul;
INLINE ir::LabelIndex newAuxLabel()
{
currAuxLabel++;
ctx(ctx), block(NULL),
curr(ctx.getSimdWidth()), file(ctx.getFunction().getRegisterFile()),
maxInsnNum(ctx.getFunction().getLargestBlockSize()), dagPool(maxInsnNum),
- stateNum(0), vectorNum(0), bwdCodeGeneration(false), currAuxLabel(ctx.getFunction().labelNum()), patchSLMAddr(false)
+ stateNum(0), vectorNum(0), bwdCodeGeneration(false), currAuxLabel(ctx.getFunction().labelNum()),
+ patchSLMAddr(false), bHas32X32Mul(false)
{
const ir::Function &fn = ctx.getFunction();
this->regNum = fn.regNum();
this->opaque->setPatchSLMAddr(true);
}
+ Selection8::Selection8(GenContext &ctx) : Selection(ctx) {
+ this->opaque->setPatchSLMAddr(true);
+ this->opaque->setHas32X32Mul(true);
+ }
+
void Selection::Opaque::TYPED_WRITE(GenRegister *msgs, uint32_t msgNum,
uint32_t bti, bool is3D) {
uint32_t elemID = 0;
using namespace ir;
const ir::BinaryInstruction &insn = cast<ir::BinaryInstruction>(dag.insn);
const Type type = insn.getType();
- if (type == TYPE_U32 || type == TYPE_S32) {
+ if (type != TYPE_U32 && type != TYPE_S32)
+ return false;
+
+ GenRegister dst = sel.selReg(insn.getDst(0), type);
+ GenRegister src0 = sel.selReg(insn.getSrc(0), type);
+ GenRegister src1 = sel.selReg(insn.getSrc(1), type);
+ if (sel.has32X32Mul()) {
+ sel.MUL(dst, src0, src1);
+ } else {
sel.push();
- if (sel.isScalarReg(insn.getDst(0)) == true) {
- sel.curr.execWidth = 1;
- sel.curr.predicate = GEN_PREDICATE_NONE;
- sel.curr.noMask = 1;
- }
- const uint32_t simdWidth = sel.curr.execWidth;
+ if (sel.isScalarReg(insn.getDst(0)) == true) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
- GenRegister dst = sel.selReg(insn.getDst(0), type);
- GenRegister src0 = sel.selReg(insn.getSrc(0), type);
- GenRegister src1 = sel.selReg(insn.getSrc(1), type);
+ const int simdWidth = sel.curr.execWidth;
// Either left part of the 16-wide register or just a simd 8 register
dst = GenRegister::retype(dst, GEN_TYPE_D);
} else
sel.MOV(GenRegister::retype(GenRegister::next(dst), GEN_TYPE_F), GenRegister::acc());
}
-
sel.pop();
- // All children are marked as root
- markAllChildren(dag);
- return true;
- } else
- return false;
+ }
+ // All children are marked as root
+ markAllChildren(dag);
+ return true;
}
};