From c722ad737907b58efa04c8ef0bd330c5adee4043 Mon Sep 17 00:00:00 2001 From: Alok Hota Date: Thu, 30 Aug 2018 17:45:06 -0500 Subject: [PATCH] swr/rast: Unaligned and translations in gathers - added graphics address translation in odd gathers - added support for unaligned gathers in fetch shader - changed how 2+ GB offsets are handled to make them compatible with unaligned offsets --- .../drivers/swr/rasterizer/jitter/fetch_jit.cpp | 56 ++++++++++++++-------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index d294a67..6feb1a7 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -368,7 +368,7 @@ void FetchJit::UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[ // gather SIMD full pixels per lane then shift/mask to move each component to their // own vector void FetchJit::CreateGatherOddFormats( - SWR_FORMAT format, Value* pMask, Value* pBase, Value* pOffsets, Value* pResult[4]) + SWR_FORMAT format, Value* pMask, Value* xpBase, Value* pOffsets, Value* pResult[4]) { const SWR_FORMAT_INFO& info = GetFormatInfo(format); @@ -378,7 +378,7 @@ void FetchJit::CreateGatherOddFormats( Value* pGather; if (info.bpp == 32) { - pGather = GATHERDD(VIMMED1(0), pBase, pOffsets, pMask); + pGather = GATHERDD(VIMMED1(0), xpBase, pOffsets, pMask); } else { @@ -386,29 +386,40 @@ void FetchJit::CreateGatherOddFormats( Value* pMem = ALLOCA(mSimdInt32Ty); STORE(VIMMED1(0u), pMem); - pBase = BITCAST(pBase, PointerType::get(mInt8Ty, 0)); - Value* pDstMem = BITCAST(pMem, mInt32PtrTy); + Value* pDstMem = POINTER_CAST(pMem, mInt32PtrTy); for (uint32_t lane = 0; lane < mVWidth; ++lane) { // Get index Value* index = VEXTRACT(pOffsets, C(lane)); Value* mask = VEXTRACT(pMask, C(lane)); + + // use branch around load based on mask + // Needed to avoid page-faults on unmasked lanes + BasicBlock* pCurrentBB = IRB()->GetInsertBlock(); + BasicBlock* pMaskedLoadBlock = + BasicBlock::Create(JM()->mContext, "MaskedLaneLoad", pCurrentBB->getParent()); + BasicBlock* pEndLoadBB = BasicBlock::Create(JM()->mContext, "AfterMaskedLoad", pCurrentBB->getParent()); + + COND_BR(mask, pMaskedLoadBlock, pEndLoadBB); + + JM()->mBuilder.SetInsertPoint(pMaskedLoadBlock); + switch (info.bpp) { case 8: { Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt8Ty, 0)); - Value* pSrc = BITCAST(GEP(pBase, index), PointerType::get(mInt8Ty, 0)); - STORE(LOAD(SELECT(mask, pSrc, pDst)), pDst); + Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType())); + STORE(LOAD(xpSrc, "", mInt8PtrTy, GFX_MEM_CLIENT_FETCH), pDst); break; } case 16: { Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0)); - Value* pSrc = BITCAST(GEP(pBase, index), PointerType::get(mInt16Ty, 0)); - STORE(LOAD(SELECT(mask, pSrc, pDst)), pDst); + Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType())); + STORE(LOAD(xpSrc, "", mInt16PtrTy, GFX_MEM_CLIENT_FETCH), pDst); break; } break; @@ -417,13 +428,13 @@ void FetchJit::CreateGatherOddFormats( { // First 16-bits of data Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0)); - Value* pSrc = BITCAST(GEP(pBase, index), PointerType::get(mInt16Ty, 0)); - STORE(LOAD(SELECT(mask, pSrc, pDst)), pDst); + Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType())); + STORE(LOAD(xpSrc, "", mInt16PtrTy, GFX_MEM_CLIENT_FETCH), pDst); // Last 8-bits of data pDst = BITCAST(GEP(pDst, C(1)), PointerType::get(mInt8Ty, 0)); - pSrc = BITCAST(GEP(pSrc, C(1)), PointerType::get(mInt8Ty, 0)); - STORE(LOAD(SELECT(mask, pSrc, pDst)), pDst); + xpSrc = ADD(xpSrc, C(2)); + STORE(LOAD(xpSrc, "", mInt8PtrTy, GFX_MEM_CLIENT_FETCH), pDst); break; } @@ -431,6 +442,9 @@ void FetchJit::CreateGatherOddFormats( SWR_INVALID("Shouldn't have BPP = %d now", info.bpp); break; } + + BR(pEndLoadBB); + JM()->mBuilder.SetInsertPoint(pEndLoadBB); } pGather = LOAD(pMem); @@ -616,7 +630,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState, // do 64bit address offset calculations. // calculate byte offset to the start of the VB - Value* baseOffset = MUL(Z_EXT(startOffset, mInt64Ty), Z_EXT(stride, mInt64Ty)); + Value* baseOffset = MUL(Z_EXT(startOffset, mInt64Ty), Z_EXT(stride, mInt64Ty)); // VGATHER* takes an *i8 src pointer so that's what stream is Value* pStreamBaseGFX = ADD(stream, baseOffset); @@ -781,17 +795,17 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState, { // Gather a SIMD of vertices // APIs allow a 4GB range for offsets - // However, GATHERPS uses signed 32-bit offsets, so only a 2GB range :( - // But, we know that elements must be aligned for FETCH. :) - // Right shift the offset by a bit and then scale by 2 to remove the - // sign extension. - Value* vShiftedOffsets = LSHR(vOffsets, 1); + // However, GATHERPS uses signed 32-bit offsets, so +/- 2GB range :( + // Add 2GB to the base pointer and 2GB to the offsets. This makes + // "negative" (large) offsets into positive offsets and small offsets + // into negative offsets. + Value* vNewOffsets = ADD(vOffsets, VIMMED1(0x80000000)); vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, - pStreamBaseGFX, - vShiftedOffsets, + ADD(pStreamBaseGFX, C((uintptr_t)0x80000000U)), + vNewOffsets, vGatherMask, - 2, + 1, GFX_MEM_CLIENT_FETCH); } else -- 2.7.4