As we store long low/high 32bits separately, when we do bitcast
like int64 --> int16, the horizontal stride of the int64's low/high
half should be set as 2 instead of 4.
This fix an regression of opencv test:
Imgproc/Threshold.Mat/40, where GetParam() = (16SC1, 0, 0, false)
Signed-off-by: Ruiling Song <ruiling.song@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
wideReg = sel.selReg(insn.getDst(index/multiple), narrowType);
narrowReg = sel.selReg(insn.getSrc(i), narrowType); //retype to narrow type
}
- if(wideReg.hstride != GEN_VERTICAL_STRIDE_0) {
+ if(wideReg.hstride != GEN_HORIZONTAL_STRIDE_0) {
if(multiple == 2) {
wideReg = sel.unpacked_uw(wideReg.reg());
wideReg = GenRegister::retype(wideReg, getGenType(narrowType));
wideReg.subphysical = 1;
}
if(isInt64) {
+ if(wideReg.hstride != GEN_HORIZONTAL_STRIDE_0) {
+ // as we store long by bottom & high part separately, we have to divide hstride by 2
+ if (wideReg.hstride == GEN_HORIZONTAL_STRIDE_2)
+ wideReg.hstride = GEN_HORIZONTAL_STRIDE_1;
+ else if (wideReg.hstride == GEN_HORIZONTAL_STRIDE_4)
+ wideReg.hstride = GEN_HORIZONTAL_STRIDE_2;
+ else
+ GBE_ASSERT(0);
+ }
// offset to next half
wideReg.subphysical = 1;
if(i >= multiple/2)