const GenRegister data = ra->genReg(insn.src(1));
const uint32_t bti = insn.extra.function;
p->MOV(src, addr);
- p->WRITE64(src, data, bti, elemNum);
+ p->WRITE64(src, data, bti, elemNum, isScalarReg(data.reg()));
}
void GenContext::emitUntypedWriteInstruction(const SelectionInstruction &insn) {
pop();
}
- void GenEncoder::WRITE64(GenRegister msg, GenRegister data, uint32_t bti, uint32_t elemNum) {
+ void GenEncoder::WRITE64(GenRegister msg, GenRegister data, uint32_t bti, uint32_t elemNum, bool is_scalar) {
GenRegister data32 = GenRegister::retype(data, GEN_TYPE_UD);
+ GenRegister unpacked;
msg = GenRegister::retype(msg, GEN_TYPE_UD);
int originSimdWidth = curr.execWidth;
int originPredicate = curr.predicate;
curr.predicate = GEN_PREDICATE_NONE;
curr.noMask = GEN_MASK_DISABLE;
curr.execWidth = 8;
- MOV(GenRegister::suboffset(msg, originSimdWidth), GenRegister::unpacked_ud(data32.nr, data32.subnr + half));
+ if (is_scalar) {
+ unpacked = data32;
+ unpacked.subnr += half * 4;
+ } else
+ unpacked = GenRegister::unpacked_ud(data32.nr, data32.subnr + half);
+ MOV(GenRegister::suboffset(msg, originSimdWidth), unpacked);
if (originSimdWidth == 16) {
- MOV(GenRegister::suboffset(msg, originSimdWidth + 8), GenRegister::unpacked_ud(data32.nr + 2, data32.subnr + half));
+ if (is_scalar) {
+ unpacked = data32;
+ unpacked.subnr += half * 4;
+ } else
+ unpacked = GenRegister::unpacked_ud(data32.nr + 2, data32.subnr + half);
+ MOV(GenRegister::suboffset(msg, originSimdWidth + 8), unpacked);
curr.execWidth = 16;
}
if (half == 1)
/*! Read 64-bits float/int arrays */
void READ64(GenRegister dst, GenRegister tmp, GenRegister addr, GenRegister src, uint32_t bti, uint32_t elemNum);
/*! Write 64-bits float/int arrays */
- void WRITE64(GenRegister src, GenRegister data, uint32_t bti, uint32_t elemNum);
+ void WRITE64(GenRegister src, GenRegister data, uint32_t bti, uint32_t elemNum, bool is_scalar);
/*! Untyped read (upto 4 channels) */
void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
/*! Untyped write (upto 4 channels) */