From: Ruiling Song Date: Wed, 23 Apr 2014 02:56:50 +0000 (+0800) Subject: GBE: Optimize byte gather read using untyped read. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3af12cc7f18c345afa3219343310d0028a2dc17f;p=contrib%2Fbeignet.git GBE: Optimize byte gather read using untyped read. Untyped read seems better than byte gather read. Some performance test in opencv got doubled after the patch. Signed-off-by: Ruiling Song --- diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index bcbf115..8c7ac09 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -2594,19 +2594,22 @@ namespace gbe } else { GBE_ASSERT(insn.getValueNum() == 1); const GenRegister value = sel.selReg(insn.getValue(0)); - // We need a temporary register if we read bytes or words - Register dst = Register(value.value.reg); - if (elemSize == GEN_BYTE_SCATTER_WORD || - elemSize == GEN_BYTE_SCATTER_BYTE) { - dst = sel.reg(FAMILY_DWORD); - sel.BYTE_GATHER(GenRegister::fxgrf(simdWidth, dst), address, elemSize, bti); - } - - // Repack bytes or words using a converting mov instruction + GBE_ASSERT(elemSize == GEN_BYTE_SCATTER_WORD || elemSize == GEN_BYTE_SCATTER_BYTE); + + Register tmpReg = sel.reg(FAMILY_DWORD); + GenRegister tmpAddr = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD)); + GenRegister tmpData = GenRegister::udxgrf(simdWidth, tmpReg); + // Get dword aligned addr + sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0xfffffffc)); + sel.UNTYPED_READ(tmpAddr, &tmpData, 1, bti); + // Get the remaining offset from aligned addr + sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0x3)); + sel.SHL(tmpAddr, tmpAddr, GenRegister::immud(0x3)); + sel.SHR(tmpData, tmpData, tmpAddr); if (elemSize == GEN_BYTE_SCATTER_WORD) - sel.MOV(GenRegister::retype(value, GEN_TYPE_UW), GenRegister::unpacked_uw(dst)); + sel.MOV(GenRegister::retype(value, GEN_TYPE_UW), GenRegister::unpacked_uw(tmpReg)); else if (elemSize == GEN_BYTE_SCATTER_BYTE) - sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(dst)); + sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(tmpReg)); } }