From 0ff57446e3786243c6d752c91be2108595f2663e Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Wed, 17 Aug 2016 14:30:32 -0500 Subject: [PATCH] swr: [rasterizer core] only use Viewport/Scissors during SwrDraw* operations Add explicit rects for: - SwrClearRenderTarget - SwrDiscardRect - SwrInvalidateTiles - SwrStoreTiles Signed-off-by: Tim Rowley --- src/gallium/drivers/swr/rasterizer/core/api.cpp | 94 ++++--- src/gallium/drivers/swr/rasterizer/core/api.h | 119 +++++++-- .../drivers/swr/rasterizer/core/backend.cpp | 69 ++--- src/gallium/drivers/swr/rasterizer/core/context.h | 9 +- .../drivers/swr/rasterizer/core/frontend.cpp | 288 +++++++++------------ src/gallium/drivers/swr/rasterizer/core/frontend.h | 10 +- .../drivers/swr/rasterizer/core/rasterizer.cpp | 122 ++++----- src/gallium/drivers/swr/rasterizer/core/utils.h | 50 +--- src/gallium/drivers/swr/swr_clear.cpp | 12 +- src/gallium/drivers/swr/swr_context.h | 1 + src/gallium/drivers/swr/swr_draw.cpp | 32 +-- src/gallium/drivers/swr/swr_state.cpp | 9 +- 12 files changed, 400 insertions(+), 415 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index e447bf6f..d53a6cbe 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -46,6 +46,8 @@ #include "common/simdintrin.h" #include "common/os.h" +static const SWR_RECT g_MaxScissorRect = { 0, 0, KNOB_MAX_SCISSOR_X, KNOB_MAX_SCISSOR_Y }; + void SetupDefaultState(SWR_CONTEXT *pContext); static INLINE SWR_CONTEXT* GetContext(HANDLE hContext) @@ -713,56 +715,46 @@ void SwrSetViewports( void SwrSetScissorRects( HANDLE hContext, uint32_t numScissors, - const BBOX* pScissors) + const SWR_RECT* pScissors) { SWR_ASSERT(numScissors <= KNOB_NUM_VIEWPORTS_SCISSORS, "Invalid number of scissor rects."); API_STATE* pState = GetDrawState(GetContext(hContext)); - memcpy(&pState->scissorRects[0], pScissors, numScissors * sizeof(BBOX)); + memcpy(&pState->scissorRects[0], pScissors, numScissors * sizeof(pScissors[0])); }; void SetupMacroTileScissors(DRAW_CONTEXT *pDC) { API_STATE *pState = &pDC->pState->state; - uint32_t left, right, top, bottom; // Set up scissor dimensions based on scissor or viewport if (pState->rastState.scissorEnable) { - // scissor rect right/bottom edge are exclusive, core expects scissor dimensions to be inclusive, so subtract one pixel from right/bottom edges - left = pState->scissorRects[0].left; - right = pState->scissorRects[0].right; - top = pState->scissorRects[0].top; - bottom = pState->scissorRects[0].bottom; + pState->scissorInFixedPoint = pState->scissorRects[0]; } else { // the vp width and height must be added to origin un-rounded then the result round to -inf. // The cast to int works for rounding assuming all [left, right, top, bottom] are positive. - left = (int32_t)pState->vp[0].x; - right = (int32_t)(pState->vp[0].x + pState->vp[0].width); - top = (int32_t)pState->vp[0].y; - bottom = (int32_t)(pState->vp[0].y + pState->vp[0].height); + pState->scissorInFixedPoint.xmin = (int32_t)pState->vp[0].x; + pState->scissorInFixedPoint.xmax = (int32_t)(pState->vp[0].x + pState->vp[0].width); + pState->scissorInFixedPoint.ymin = (int32_t)pState->vp[0].y; + pState->scissorInFixedPoint.ymax = (int32_t)(pState->vp[0].y + pState->vp[0].height); } - right = std::min(right, KNOB_MAX_SCISSOR_X); - bottom = std::min(bottom, KNOB_MAX_SCISSOR_Y); + // Clamp to max rect + pState->scissorInFixedPoint &= g_MaxScissorRect; - if (left > KNOB_MAX_SCISSOR_X || top > KNOB_MAX_SCISSOR_Y) - { - pState->scissorInFixedPoint.left = 0; - pState->scissorInFixedPoint.right = 0; - pState->scissorInFixedPoint.top = 0; - pState->scissorInFixedPoint.bottom = 0; - } - else - { - pState->scissorInFixedPoint.left = left * FIXED_POINT_SCALE; - pState->scissorInFixedPoint.right = right * FIXED_POINT_SCALE - 1; - pState->scissorInFixedPoint.top = top * FIXED_POINT_SCALE; - pState->scissorInFixedPoint.bottom = bottom * FIXED_POINT_SCALE - 1; - } + // Scale to fixed point + pState->scissorInFixedPoint.xmin *= FIXED_POINT_SCALE; + pState->scissorInFixedPoint.xmax *= FIXED_POINT_SCALE; + pState->scissorInFixedPoint.ymin *= FIXED_POINT_SCALE; + pState->scissorInFixedPoint.ymax *= FIXED_POINT_SCALE; + + // Make scissor inclusive + pState->scissorInFixedPoint.xmax -= 1; + pState->scissorInFixedPoint.ymax -= 1; } // templated backend function tables @@ -1303,9 +1295,12 @@ void SwrDrawIndexedInstanced( /// @brief SwrInvalidateTiles /// @param hContext - Handle passed back from SwrCreateContext /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate. -void SwrInvalidateTiles( +/// @param invalidateRect - The pixel-coordinate rectangle to invalidate. This will be expanded to +/// be hottile size-aligned. +void SWR_API SwrInvalidateTiles( HANDLE hContext, - uint32_t attachmentMask) + uint32_t attachmentMask, + const SWR_RECT& invalidateRect) { if (KNOB_TOSS_DRAW) { @@ -1318,7 +1313,8 @@ void SwrInvalidateTiles( pDC->FeWork.type = DISCARDINVALIDATETILES; pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles; pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask; - memset(&pDC->FeWork.desc.discardInvalidateTiles.rect, 0, sizeof(SWR_RECT)); + pDC->FeWork.desc.discardInvalidateTiles.rect = invalidateRect; + pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect; pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_INVALID; pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = false; pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = false; @@ -1331,11 +1327,12 @@ void SwrInvalidateTiles( /// @brief SwrDiscardRect /// @param hContext - Handle passed back from SwrCreateContext /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard. -/// @param rect - if rect is all zeros, the entire attachment surface will be discarded -void SwrDiscardRect( +/// @param rect - The pixel-coordinate rectangle to discard. Only fully-covered hottiles will be +/// discarded. +void SWR_API SwrDiscardRect( HANDLE hContext, uint32_t attachmentMask, - SWR_RECT rect) + const SWR_RECT& rect) { if (KNOB_TOSS_DRAW) { @@ -1350,6 +1347,7 @@ void SwrDiscardRect( pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles; pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask; pDC->FeWork.desc.discardInvalidateTiles.rect = rect; + pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect; pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_RESOLVED; pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = true; pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = true; @@ -1398,10 +1396,11 @@ void SwrDispatch( // Deswizzles, converts and stores current contents of the hot tiles to surface // described by pState -void SwrStoreTiles( +void SWR_API SwrStoreTiles( HANDLE hContext, SWR_RENDERTARGET_ATTACHMENT attachment, - SWR_TILE_STATE postStoreTileState) + SWR_TILE_STATE postStoreTileState, + const SWR_RECT& storeRect) { if (KNOB_TOSS_DRAW) { @@ -1413,12 +1412,12 @@ void SwrStoreTiles( SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); - SetupMacroTileScissors(pDC); - pDC->FeWork.type = STORETILES; pDC->FeWork.pfnWork = ProcessStoreTiles; pDC->FeWork.desc.storeTiles.attachment = attachment; pDC->FeWork.desc.storeTiles.postStoreTileState = postStoreTileState; + pDC->FeWork.desc.storeTiles.rect = storeRect; + pDC->FeWork.desc.storeTiles.rect &= g_MaxScissorRect; //enqueue QueueDraw(pContext); @@ -1426,12 +1425,21 @@ void SwrStoreTiles( RDTSC_STOP(APIStoreTiles, 0, 0); } -void SwrClearRenderTarget( +////////////////////////////////////////////////////////////////////////// +/// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil +/// @param hContext - Handle passed back from SwrCreateContext +/// @param clearMask - combination of SWR_CLEAR_COLOR / SWR_CLEAR_DEPTH / SWR_CLEAR_STENCIL flags (or SWR_CLEAR_NONE) +/// @param clearColor - color use for clearing render targets +/// @param z - depth value use for clearing depth buffer +/// @param stencil - stencil value used for clearing stencil buffer +/// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers +void SWR_API SwrClearRenderTarget( HANDLE hContext, uint32_t clearMask, const float clearColor[4], float z, - uint8_t stencil) + uint8_t stencil, + const SWR_RECT& clearRect) { if (KNOB_TOSS_DRAW) { @@ -1441,16 +1449,16 @@ void SwrClearRenderTarget( RDTSC_START(APIClearRenderTarget); SWR_CONTEXT *pContext = GetContext(hContext); - DRAW_CONTEXT* pDC = GetDrawContext(pContext); - SetupMacroTileScissors(pDC); - CLEAR_FLAGS flags; + flags.bits = 0; flags.mask = clearMask; pDC->FeWork.type = CLEAR; pDC->FeWork.pfnWork = ProcessClear; + pDC->FeWork.desc.clear.rect = clearRect; + pDC->FeWork.desc.clear.rect &= g_MaxScissorRect; pDC->FeWork.desc.clear.flags = flags; pDC->FeWork.desc.clear.clearDepth = z; pDC->FeWork.desc.clear.clearRTColor[0] = clearColor[0]; diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h index ed18fe0..9ca235d8 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.h +++ b/src/gallium/drivers/swr/rasterizer/core/api.h @@ -32,18 +32,82 @@ #include "common/os.h" #include -#include +#include #include "common/simdintrin.h" #include "common/formats.h" -#include "core/utils.h" #include "core/state.h" -///@todo place all the API functions into the 'swr' namespace. - typedef void(SWR_API *PFN_CALLBACK_FUNC)(uint64_t data, uint64_t data2, uint64_t data3); ////////////////////////////////////////////////////////////////////////// +/// @brief Rectangle structure +struct SWR_RECT +{ + int32_t xmin; ///< inclusive + int32_t ymin; ///< inclusive + int32_t xmax; ///< exclusive + int32_t ymax; ///< exclusive + + bool operator == (const SWR_RECT& rhs) + { + return (this->ymin == rhs.ymin && + this->ymax == rhs.ymax && + this->xmin == rhs.xmin && + this->xmax == rhs.xmax); + } + + bool operator != (const SWR_RECT& rhs) + { + return !(*this == rhs); + } + + SWR_RECT& Intersect(const SWR_RECT& other) + { + this->xmin = std::max(this->xmin, other.xmin); + this->ymin = std::max(this->ymin, other.ymin); + this->xmax = std::min(this->xmax, other.xmax); + this->ymax = std::min(this->ymax, other.ymax); + + if (xmax - xmin < 0 || + ymax - ymin < 0) + { + // Zero area + ymin = ymax = xmin = xmax = 0; + } + + return *this; + } + SWR_RECT& operator &= (const SWR_RECT& other) + { + return Intersect(other); + } + + SWR_RECT& Union(const SWR_RECT& other) + { + this->xmin = std::min(this->xmin, other.xmin); + this->ymin = std::min(this->ymin, other.ymin); + this->xmax = std::max(this->xmax, other.xmax); + this->ymax = std::max(this->ymax, other.ymax); + + return *this; + } + + SWR_RECT& operator |= (const SWR_RECT& other) + { + return Union(other); + } + + void Translate(int32_t x, int32_t y) + { + xmin += x; + ymin += y; + xmax += x; + ymax += y; + } +}; + +////////////////////////////////////////////////////////////////////////// /// @brief Function signature for load hot tiles /// @param hPrivateContext - handle to private data /// @param dstFormat - format of the hot tile @@ -105,6 +169,10 @@ typedef void(SWR_API *PFN_UPDATE_STATS)(HANDLE hPrivateContext, typedef void(SWR_API *PFN_UPDATE_STATS_FE)(HANDLE hPrivateContext, const SWR_STATS_FE* pStats); +////////////////////////////////////////////////////////////////////////// +/// BucketManager +/// Forward Declaration (see rdtsc_buckets.h for full definition) +///////////////////////////////////////////////////////////////////////// class BucketManager; ////////////////////////////////////////////////////////////////////////// @@ -150,17 +218,6 @@ struct SWR_CREATECONTEXT_INFO }; ////////////////////////////////////////////////////////////////////////// -/// SWR_RECT -///////////////////////////////////////////////////////////////////////// -struct SWR_RECT -{ - uint32_t left; - uint32_t right; - uint32_t top; - uint32_t bottom; -}; - -////////////////////////////////////////////////////////////////////////// /// @brief Create SWR Context. /// @param pCreateInfo - pointer to creation info. HANDLE SWR_API SwrCreateContext( @@ -445,19 +502,23 @@ void SWR_API SwrDrawIndexedInstanced( /// @brief SwrInvalidateTiles /// @param hContext - Handle passed back from SwrCreateContext /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate. +/// @param invalidateRect - The pixel-coordinate rectangle to invalidate. This will be expanded to +/// be hottile size-aligned. void SWR_API SwrInvalidateTiles( HANDLE hContext, - uint32_t attachmentMask); + uint32_t attachmentMask, + const SWR_RECT& invalidateRect); ////////////////////////////////////////////////////////////////////////// /// @brief SwrDiscardRect /// @param hContext - Handle passed back from SwrCreateContext /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard. -/// @param rect - if rect is all zeros, the entire attachment surface will be discarded +/// @param rect - The pixel-coordinate rectangle to discard. Only fully-covered hottiles will be +/// discarded. void SWR_API SwrDiscardRect( HANDLE hContext, uint32_t attachmentMask, - SWR_RECT rect); + const SWR_RECT& rect); ////////////////////////////////////////////////////////////////////////// /// @brief SwrDispatch @@ -483,15 +544,30 @@ enum SWR_TILE_STATE void SWR_API SwrStoreTiles( HANDLE hContext, SWR_RENDERTARGET_ATTACHMENT attachment, - SWR_TILE_STATE postStoreTileState); + SWR_TILE_STATE postStoreTileState, + const SWR_RECT& storeRect); + +////////////////////////////////////////////////////////////////////////// +/// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil +/// @param hContext - Handle passed back from SwrCreateContext +/// @param clearMask - combination of SWR_CLEAR_COLOR / SWR_CLEAR_DEPTH / SWR_CLEAR_STENCIL flags (or SWR_CLEAR_NONE) +/// @param clearColor - color use for clearing render targets +/// @param z - depth value use for clearing depth buffer +/// @param stencil - stencil value used for clearing stencil buffer +/// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers void SWR_API SwrClearRenderTarget( HANDLE hContext, uint32_t clearMask, const float clearColor[4], float z, - uint8_t stencil); + uint8_t stencil, + const SWR_RECT& clearRect); +////////////////////////////////////////////////////////////////////////// +/// @brief SwrSetRastyState +/// @param hContext - Handle passed back from SwrCreateContext +/// @param pRastState - New SWR_RASTSTATE used for SwrDraw* commands void SWR_API SwrSetRastState( HANDLE hContext, const SWR_RASTSTATE *pRastState); @@ -516,7 +592,7 @@ void SWR_API SwrSetViewports( void SWR_API SwrSetScissorRects( HANDLE hContext, uint32_t numScissors, - const BBOX* pScissors); + const SWR_RECT* pScissors); ////////////////////////////////////////////////////////////////////////// /// @brief Returns a pointer to the private context state for the current @@ -555,4 +631,5 @@ void SWR_API SwrEnableStats( /// @param hContext - Handle passed back from SwrCreateContext void SWR_API SwrEndFrame( HANDLE hContext); + #endif//__SWR_API_H__ diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp index dff86b3..1e4dca2 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp @@ -37,7 +37,7 @@ #include -typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, DWORD[4]); +typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, DWORD[4], const SWR_RECT& rect); static PFN_CLEAR_TILES sClearTilesTable[NUM_SWR_FORMATS]; ////////////////////////////////////////////////////////////////////////// @@ -88,7 +88,7 @@ void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi template void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value) { - auto lambda = [&](int comp) + auto lambda = [&](int32_t comp) { FormatTraits::storeSOA(comp, pTileBuffer, value.v[comp]); pTileBuffer += (KNOB_SIMD_WIDTH * FormatTraits::GetBPC(comp) / 8); @@ -102,7 +102,7 @@ void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value) } template -INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, DWORD clear[4]) +INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, DWORD clear[4], const SWR_RECT& rect) { // convert clear color to hottile format // clear color is in RGBA float/uint32 @@ -122,32 +122,33 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui uint32_t tileX, tileY; MacroTileMgr::getTileIndices(macroTile, tileX, tileY); - const API_STATE& state = GetApiState(pDC); - - int top = KNOB_MACROTILE_Y_DIM_FIXED * tileY; - int bottom = top + KNOB_MACROTILE_Y_DIM_FIXED - 1; - int left = KNOB_MACROTILE_X_DIM_FIXED * tileX; - int right = left + KNOB_MACROTILE_X_DIM_FIXED - 1; - // intersect with scissor - top = std::max(top, state.scissorInFixedPoint.top); - left = std::max(left, state.scissorInFixedPoint.left); - bottom = std::min(bottom, state.scissorInFixedPoint.bottom); - right = std::min(right, state.scissorInFixedPoint.right); + // Init to full macrotile + SWR_RECT clearTile = + { + KNOB_MACROTILE_X_DIM * int32_t(tileX), + KNOB_MACROTILE_Y_DIM * int32_t(tileY), + KNOB_MACROTILE_X_DIM * int32_t(tileX + 1), + KNOB_MACROTILE_Y_DIM * int32_t(tileY + 1), + }; + + // intersect with clear rect + clearTile &= rect; // translate to local hottile origin - top -= KNOB_MACROTILE_Y_DIM_FIXED * tileY; - bottom -= KNOB_MACROTILE_Y_DIM_FIXED * tileY; - left -= KNOB_MACROTILE_X_DIM_FIXED * tileX; - right -= KNOB_MACROTILE_X_DIM_FIXED * tileX; + clearTile.Translate(-int32_t(tileX) * KNOB_MACROTILE_X_DIM, -int32_t(tileY) * KNOB_MACROTILE_Y_DIM); + + // Make maximums inclusive (needed for convert to raster tiles) + clearTile.xmax -= 1; + clearTile.ymax -= 1; // convert to raster tiles - top >>= (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT); - bottom >>= (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT); - left >>= (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT); - right >>= (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT); + clearTile.ymin >>= (KNOB_TILE_Y_DIM_SHIFT); + clearTile.ymax >>= (KNOB_TILE_Y_DIM_SHIFT); + clearTile.xmin >>= (KNOB_TILE_X_DIM_SHIFT); + clearTile.xmax >>= (KNOB_TILE_X_DIM_SHIFT); - const int numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount); + const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount); // compute steps between raster tile samples / raster tiles / macro tile rows const uint32_t rasterTileSampleStep = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits::bpp / 8; const uint32_t rasterTileStep = (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits::bpp / 8)) * numSamples; @@ -155,16 +156,16 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui const uint32_t pitch = (FormatTraits::bpp * KNOB_MACROTILE_X_DIM / 8); HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, macroTile, rt, true, numSamples); - uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits::bpp > >(pitch, left, top)) * numSamples; + uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits::bpp > >(pitch, clearTile.xmin, clearTile.ymin)) * numSamples; uint8_t* pRasterTileRow = pHotTile->pBuffer + rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits::bpp > >(pitch, x, y)) * numSamples; // loop over all raster tiles in the current hot tile - for (int y = top; y <= bottom; ++y) + for (int32_t y = clearTile.ymin; y <= clearTile.ymax; ++y) { uint8_t* pRasterTile = pRasterTileRow; - for (int x = left; x <= right; ++x) + for (int32_t x = clearTile.xmin; x <= clearTile.xmax; ++x) { - for( int sampleNum = 0; sampleNum < numSamples; sampleNum++) + for( int32_t sampleNum = 0; sampleNum < numSamples; sampleNum++) { ClearRasterTile(pRasterTile, vClear); pRasterTile += rasterTileSampleStep; @@ -241,7 +242,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[KNOB_COLOR_HOT_TILE_FORMAT]; SWR_ASSERT(pfnClearTiles != nullptr); - pfnClearTiles(pDC, SWR_ATTACHMENT_COLOR0, macroTile, clearData); + pfnClearTiles(pDC, SWR_ATTACHMENT_COLOR0, macroTile, clearData, pClear->rect); } if (pClear->flags.mask & SWR_CLEAR_DEPTH) @@ -251,7 +252,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT]; SWR_ASSERT(pfnClearTiles != nullptr); - pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, clearData); + pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, clearData, pClear->rect); } if (pClear->flags.mask & SWR_CLEAR_STENCIL) @@ -261,7 +262,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo clearData[0] = *(DWORD*)&value; PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT]; - pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, clearData); + pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, clearData, pClear->rect); } RDTSC_STOP(BEClear, 0, 0); @@ -307,13 +308,13 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[srcFormat]; SWR_ASSERT(pfnClearTiles != nullptr); - pfnClearTiles(pDC, pDesc->attachment, macroTile, pHotTile->clearData); + pfnClearTiles(pDC, pDesc->attachment, macroTile, pHotTile->clearData, pDesc->rect); } if (pHotTile->state == HOTTILE_DIRTY || pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY) { - int destX = KNOB_MACROTILE_X_DIM * x; - int destY = KNOB_MACROTILE_Y_DIM * y; + int32_t destX = KNOB_MACROTILE_X_DIM * x; + int32_t destY = KNOB_MACROTILE_Y_DIM * y; pContext->pfnStoreTile(GetPrivateState(pDC), srcFormat, pDesc->attachment, destX, destY, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); @@ -334,7 +335,7 @@ void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint3 DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC *)pData; SWR_CONTEXT *pContext = pDC->pContext; - const int numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount); + const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount); for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; ++i) { diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index 0a85ebe..8182053 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -109,6 +109,7 @@ union CLEAR_FLAGS struct CLEAR_DESC { + SWR_RECT rect; CLEAR_FLAGS flags; float clearRTColor[4]; // RGBA_32F float clearDepth; // [0..1] @@ -136,6 +137,7 @@ struct STORE_TILES_DESC { SWR_RENDERTARGET_ATTACHMENT attachment; SWR_TILE_STATE postStoreTileState; + SWR_RECT rect; }; struct COMPUTE_DESC @@ -271,8 +273,8 @@ OSALIGNLINE(struct) API_STATE SWR_VIEWPORT vp[KNOB_NUM_VIEWPORTS_SCISSORS]; SWR_VIEWPORT_MATRICES vpMatrices; - BBOX scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS]; - BBOX scissorInFixedPoint; + SWR_RECT scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS]; + SWR_RECT scissorInFixedPoint; // Backend state SWR_BACKEND_STATE backendState; @@ -494,8 +496,5 @@ struct SWR_CONTEXT TileSet singleThreadLockedTiles; }; -void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId); -void WakeAllThreads(SWR_CONTEXT *pContext); - #define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.stats[workerId].name += count; } #define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.statsFE.name += count; } diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 2809502..04c62ad 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -93,26 +93,24 @@ void ProcessClear( uint32_t workerId, void *pUserData) { - CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData; + CLEAR_DESC *pDesc = (CLEAR_DESC*)pUserData; MacroTileMgr *pTileMgr = pDC->pTileMgr; - const API_STATE& state = GetApiState(pDC); - // queue a clear to each macro tile - // compute macro tile bounds for the current scissor/viewport - uint32_t macroTileLeft = state.scissorInFixedPoint.left / KNOB_MACROTILE_X_DIM_FIXED; - uint32_t macroTileRight = state.scissorInFixedPoint.right / KNOB_MACROTILE_X_DIM_FIXED; - uint32_t macroTileTop = state.scissorInFixedPoint.top / KNOB_MACROTILE_Y_DIM_FIXED; - uint32_t macroTileBottom = state.scissorInFixedPoint.bottom / KNOB_MACROTILE_Y_DIM_FIXED; + // compute macro tile bounds for the specified rect + uint32_t macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM; + uint32_t macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM; + uint32_t macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM; + uint32_t macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM; BE_WORK work; work.type = CLEAR; work.pfnWork = ProcessClearBE; - work.desc.clear = *pClear; + work.desc.clear = *pDesc; - for (uint32_t y = macroTileTop; y <= macroTileBottom; ++y) + for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y) { - for (uint32_t x = macroTileLeft; x <= macroTileRight; ++x) + for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x) { pTileMgr->enqueue(x, y, &work); } @@ -133,28 +131,25 @@ void ProcessStoreTiles( void *pUserData) { RDTSC_START(FEProcessStoreTiles); - STORE_TILES_DESC *pStore = (STORE_TILES_DESC*)pUserData; MacroTileMgr *pTileMgr = pDC->pTileMgr; - - const API_STATE& state = GetApiState(pDC); + STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData; // queue a store to each macro tile - // compute macro tile bounds for the current render target - const uint32_t macroWidth = KNOB_MACROTILE_X_DIM; - const uint32_t macroHeight = KNOB_MACROTILE_Y_DIM; - - uint32_t numMacroTilesX = ((uint32_t)state.vp[0].width + (uint32_t)state.vp[0].x + (macroWidth - 1)) / macroWidth; - uint32_t numMacroTilesY = ((uint32_t)state.vp[0].height + (uint32_t)state.vp[0].y + (macroHeight - 1)) / macroHeight; + // compute macro tile bounds for the specified rect + uint32_t macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM; + uint32_t macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM; + uint32_t macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM; + uint32_t macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM; // store tiles BE_WORK work; work.type = STORETILES; work.pfnWork = ProcessStoreTileBE; - work.desc.storeTiles = *pStore; + work.desc.storeTiles = *pDesc; - for (uint32_t x = 0; x < numMacroTilesX; ++x) + for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y) { - for (uint32_t y = 0; y < numMacroTilesY; ++y) + for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x) { pTileMgr->enqueue(x, y, &work); } @@ -177,64 +172,39 @@ void ProcessDiscardInvalidateTiles( void *pUserData) { RDTSC_START(FEProcessInvalidateTiles); - DISCARD_INVALIDATE_TILES_DESC *pInv = (DISCARD_INVALIDATE_TILES_DESC*)pUserData; + DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData; MacroTileMgr *pTileMgr = pDC->pTileMgr; - SWR_RECT rect; + // compute macro tile bounds for the specified rect + uint32_t macroTileXMin = (pDesc->rect.xmin + KNOB_MACROTILE_X_DIM - 1) / KNOB_MACROTILE_X_DIM; + uint32_t macroTileXMax = (pDesc->rect.xmax / KNOB_MACROTILE_X_DIM) - 1; + uint32_t macroTileYMin = (pDesc->rect.ymin + KNOB_MACROTILE_Y_DIM - 1) / KNOB_MACROTILE_Y_DIM; + uint32_t macroTileYMax = (pDesc->rect.ymax / KNOB_MACROTILE_Y_DIM) - 1; - if (pInv->rect.top | pInv->rect.bottom | pInv->rect.right | pInv->rect.left) - { - // Valid rect - rect = pInv->rect; - } - else - { - // Use viewport dimensions - const API_STATE& state = GetApiState(pDC); - - rect.left = (uint32_t)state.vp[0].x; - rect.right = (uint32_t)(state.vp[0].x + state.vp[0].width); - rect.top = (uint32_t)state.vp[0].y; - rect.bottom = (uint32_t)(state.vp[0].y + state.vp[0].height); - } - - // queue a store to each macro tile - // compute macro tile bounds for the current render target - uint32_t macroWidth = KNOB_MACROTILE_X_DIM; - uint32_t macroHeight = KNOB_MACROTILE_Y_DIM; - - // Setup region assuming full tiles - uint32_t macroTileStartX = (rect.left + (macroWidth - 1)) / macroWidth; - uint32_t macroTileStartY = (rect.top + (macroHeight - 1)) / macroHeight; - - uint32_t macroTileEndX = rect.right / macroWidth; - uint32_t macroTileEndY = rect.bottom / macroHeight; - - if (pInv->fullTilesOnly == false) + if (pDesc->fullTilesOnly == false) { // include partial tiles - macroTileStartX = rect.left / macroWidth; - macroTileStartY = rect.top / macroHeight; - - macroTileEndX = (rect.right + macroWidth - 1) / macroWidth; - macroTileEndY = (rect.bottom + macroHeight - 1) / macroHeight; + macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM; + macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM; + macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM; + macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM; } - SWR_ASSERT(macroTileEndX <= KNOB_NUM_HOT_TILES_X); - SWR_ASSERT(macroTileEndY <= KNOB_NUM_HOT_TILES_Y); + SWR_ASSERT(macroTileXMax <= KNOB_NUM_HOT_TILES_X); + SWR_ASSERT(macroTileYMax <= KNOB_NUM_HOT_TILES_Y); - macroTileEndX = std::min(macroTileEndX, KNOB_NUM_HOT_TILES_X); - macroTileEndY = std::min(macroTileEndY, KNOB_NUM_HOT_TILES_Y); + macroTileXMax = std::min(macroTileXMax, KNOB_NUM_HOT_TILES_X); + macroTileYMax = std::min(macroTileYMax, KNOB_NUM_HOT_TILES_Y); // load tiles BE_WORK work; work.type = DISCARDINVALIDATETILES; work.pfnWork = ProcessDiscardInvalidateTilesBE; - work.desc.discardInvalidateTiles = *pInv; + work.desc.discardInvalidateTiles = *pDesc; - for (uint32_t x = macroTileStartX; x < macroTileEndX; ++x) + for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x) { - for (uint32_t y = macroTileStartY; y < macroTileEndY; ++y) + for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y) { pTileMgr->enqueue(x, y, &work); } @@ -587,7 +557,7 @@ static void StreamOut( ////////////////////////////////////////////////////////////////////////// /// @brief Computes number of invocations. The current index represents /// the start of the SIMD. The max index represents how much work -/// items are remaining. If there is less then a SIMD's left of work +/// items are remaining. If there is less then a SIMD's xmin of work /// then return the remaining amount of work. /// @param curIndex - The start index for the SIMD. /// @param maxIndex - The last index for all work items. @@ -1694,10 +1664,10 @@ INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari vMaxY = _simd_max_epi32(vMaxY, vY[1]); vMaxY = _simd_max_epi32(vMaxY, vY[2]); - bbox.left = vMinX; - bbox.right = vMaxX; - bbox.top = vMinY; - bbox.bottom = vMaxY; + bbox.xmin = vMinX; + bbox.xmax = vMaxX; + bbox.ymin = vMinY; + bbox.ymax = vMaxY; } ////////////////////////////////////////////////////////////////////////// @@ -1727,10 +1697,10 @@ INLINE void calcBoundingBoxIntVertical(const simdvector * c /// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative rasterization /// expand bbox by 1/256; coverage will be correctly handled in the rasterizer. - bbox.left = _simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)); - bbox.right = _simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)); - bbox.top = _simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)); - bbox.bottom = _simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)); + bbox.xmin = _simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)); + bbox.xmax = _simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)); + bbox.ymin = _simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)); + bbox.ymax = _simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value)); } ////////////////////////////////////////////////////////////////////////// @@ -1940,27 +1910,27 @@ void BinTriangles( // determine if triangle falls between pixel centers and discard // only discard for non-MSAA case and when conservative rast is disabled - // (left + 127) & ~255 - // (right + 128) & ~255 + // (xmin + 127) & ~255 + // (xmax + 128) & ~255 if(rastState.sampleCount == SWR_MULTISAMPLE_1X && (!CT::IsConservativeT::value)) { origTriMask = triMask; int cullCenterMask; { - simdscalari left = _simd_add_epi32(bbox.left, _simd_set1_epi32(127)); - left = _simd_and_si(left, _simd_set1_epi32(~255)); - simdscalari right = _simd_add_epi32(bbox.right, _simd_set1_epi32(128)); - right = _simd_and_si(right, _simd_set1_epi32(~255)); + simdscalari xmin = _simd_add_epi32(bbox.xmin, _simd_set1_epi32(127)); + xmin = _simd_and_si(xmin, _simd_set1_epi32(~255)); + simdscalari xmax = _simd_add_epi32(bbox.xmax, _simd_set1_epi32(128)); + xmax = _simd_and_si(xmax, _simd_set1_epi32(~255)); - simdscalari vMaskH = _simd_cmpeq_epi32(left, right); + simdscalari vMaskH = _simd_cmpeq_epi32(xmin, xmax); - simdscalari top = _simd_add_epi32(bbox.top, _simd_set1_epi32(127)); - top = _simd_and_si(top, _simd_set1_epi32(~255)); - simdscalari bottom = _simd_add_epi32(bbox.bottom, _simd_set1_epi32(128)); - bottom = _simd_and_si(bottom, _simd_set1_epi32(~255)); + simdscalari ymin = _simd_add_epi32(bbox.ymin, _simd_set1_epi32(127)); + ymin = _simd_and_si(ymin, _simd_set1_epi32(~255)); + simdscalari ymax = _simd_add_epi32(bbox.ymax, _simd_set1_epi32(128)); + ymax = _simd_and_si(ymax, _simd_set1_epi32(~255)); - simdscalari vMaskV = _simd_cmpeq_epi32(top, bottom); + simdscalari vMaskV = _simd_cmpeq_epi32(ymin, ymax); vMaskV = _simd_or_si(vMaskH, vMaskV); cullCenterMask = _simd_movemask_ps(_simd_castsi_ps(vMaskV)); } @@ -1973,26 +1943,26 @@ void BinTriangles( } } - // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since right/bottom edge is exclusive. - bbox.left = _simd_max_epi32(bbox.left, _simd_set1_epi32(state.scissorInFixedPoint.left)); - bbox.top = _simd_max_epi32(bbox.top, _simd_set1_epi32(state.scissorInFixedPoint.top)); - bbox.right = _simd_min_epi32(_simd_sub_epi32(bbox.right, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.right)); - bbox.bottom = _simd_min_epi32(_simd_sub_epi32(bbox.bottom, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.bottom)); + // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive. + bbox.xmin = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin)); + bbox.ymin = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin)); + bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax)); + bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax)); if(CT::IsConservativeT::value) { // in the case where a degenerate triangle is on a scissor edge, we need to make sure the primitive bbox has - // some area. Bump the right/bottom edges out - simdscalari topEqualsBottom = _simd_cmpeq_epi32(bbox.top, bbox.bottom); - bbox.bottom = _simd_blendv_epi32(bbox.bottom, _simd_add_epi32(bbox.bottom, _simd_set1_epi32(1)), topEqualsBottom); - simdscalari leftEqualsRight = _simd_cmpeq_epi32(bbox.left, bbox.right); - bbox.right = _simd_blendv_epi32(bbox.right, _simd_add_epi32(bbox.right, _simd_set1_epi32(1)), leftEqualsRight); + // some area. Bump the xmax/ymax edges out + simdscalari topEqualsBottom = _simd_cmpeq_epi32(bbox.ymin, bbox.ymax); + bbox.ymax = _simd_blendv_epi32(bbox.ymax, _simd_add_epi32(bbox.ymax, _simd_set1_epi32(1)), topEqualsBottom); + simdscalari leftEqualsRight = _simd_cmpeq_epi32(bbox.xmin, bbox.xmax); + bbox.xmax = _simd_blendv_epi32(bbox.xmax, _simd_add_epi32(bbox.xmax, _simd_set1_epi32(1)), leftEqualsRight); } // Cull tris completely outside scissor { - simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.left, bbox.right); - simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.top, bbox.bottom); + simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.xmin, bbox.xmax); + simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.ymin, bbox.ymax); simdscalari maskOutsideScissorXY = _simd_or_si(maskOutsideScissorX, maskOutsideScissorY); uint32_t maskOutsideScissor = _simd_movemask_ps(_simd_castsi_ps(maskOutsideScissorXY)); triMask = triMask & ~maskOutsideScissor; @@ -2004,16 +1974,16 @@ void BinTriangles( } // Convert triangle bbox to macrotile units. - bbox.left = _simd_srai_epi32(bbox.left, KNOB_MACROTILE_X_DIM_FIXED_SHIFT); - bbox.top = _simd_srai_epi32(bbox.top, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT); - bbox.right = _simd_srai_epi32(bbox.right, KNOB_MACROTILE_X_DIM_FIXED_SHIFT); - bbox.bottom = _simd_srai_epi32(bbox.bottom, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT); + bbox.xmin = _simd_srai_epi32(bbox.xmin, KNOB_MACROTILE_X_DIM_FIXED_SHIFT); + bbox.ymin = _simd_srai_epi32(bbox.ymin, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT); + bbox.xmax = _simd_srai_epi32(bbox.xmax, KNOB_MACROTILE_X_DIM_FIXED_SHIFT); + bbox.ymax = _simd_srai_epi32(bbox.ymax, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT); OSALIGNSIMD(uint32_t) aMTLeft[KNOB_SIMD_WIDTH], aMTRight[KNOB_SIMD_WIDTH], aMTTop[KNOB_SIMD_WIDTH], aMTBottom[KNOB_SIMD_WIDTH]; - _simd_store_si((simdscalari*)aMTLeft, bbox.left); - _simd_store_si((simdscalari*)aMTRight, bbox.right); - _simd_store_si((simdscalari*)aMTTop, bbox.top); - _simd_store_si((simdscalari*)aMTBottom, bbox.bottom); + _simd_store_si((simdscalari*)aMTLeft, bbox.xmin); + _simd_store_si((simdscalari*)aMTRight, bbox.xmax); + _simd_store_si((simdscalari*)aMTTop, bbox.ymin); + _simd_store_si((simdscalari*)aMTBottom, bbox.ymax); // transpose verts needed for backend /// @todo modify BE to take non-transformed verts @@ -2196,11 +2166,11 @@ void BinPoints( if (CanUseSimplePoints(pDC)) { - // adjust for top-left rule + // adjust for ymin-xmin rule vXi = _simd_sub_epi32(vXi, _simd_set1_epi32(1)); vYi = _simd_sub_epi32(vYi, _simd_set1_epi32(1)); - // cull points off the top-left edge of the viewport + // cull points off the ymin-xmin edge of the viewport primMask &= ~_simd_movemask_ps(_simd_castsi_ps(vXi)); primMask &= ~_simd_movemask_ps(_simd_castsi_ps(vYi)); @@ -2325,40 +2295,40 @@ void BinPoints( // bloat point to bbox simdBBox bbox; - bbox.left = bbox.right = vXi; - bbox.top = bbox.bottom = vYi; + bbox.xmin = bbox.xmax = vXi; + bbox.ymin = bbox.ymax = vYi; simdscalar vHalfWidth = _simd_mul_ps(vPointSize, _simd_set1_ps(0.5f)); simdscalari vHalfWidthi = fpToFixedPointVertical(vHalfWidth); - bbox.left = _simd_sub_epi32(bbox.left, vHalfWidthi); - bbox.right = _simd_add_epi32(bbox.right, vHalfWidthi); - bbox.top = _simd_sub_epi32(bbox.top, vHalfWidthi); - bbox.bottom = _simd_add_epi32(bbox.bottom, vHalfWidthi); + bbox.xmin = _simd_sub_epi32(bbox.xmin, vHalfWidthi); + bbox.xmax = _simd_add_epi32(bbox.xmax, vHalfWidthi); + bbox.ymin = _simd_sub_epi32(bbox.ymin, vHalfWidthi); + bbox.ymax = _simd_add_epi32(bbox.ymax, vHalfWidthi); - // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since right/bottom edge is exclusive. - bbox.left = _simd_max_epi32(bbox.left, _simd_set1_epi32(state.scissorInFixedPoint.left)); - bbox.top = _simd_max_epi32(bbox.top, _simd_set1_epi32(state.scissorInFixedPoint.top)); - bbox.right = _simd_min_epi32(_simd_sub_epi32(bbox.right, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.right)); - bbox.bottom = _simd_min_epi32(_simd_sub_epi32(bbox.bottom, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.bottom)); + // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive. + bbox.xmin = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin)); + bbox.ymin = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin)); + bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax)); + bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax)); // Cull bloated points completely outside scissor - simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.left, bbox.right); - simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.top, bbox.bottom); + simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.xmin, bbox.xmax); + simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.ymin, bbox.ymax); simdscalari maskOutsideScissorXY = _simd_or_si(maskOutsideScissorX, maskOutsideScissorY); uint32_t maskOutsideScissor = _simd_movemask_ps(_simd_castsi_ps(maskOutsideScissorXY)); primMask = primMask & ~maskOutsideScissor; // Convert bbox to macrotile units. - bbox.left = _simd_srai_epi32(bbox.left, KNOB_MACROTILE_X_DIM_FIXED_SHIFT); - bbox.top = _simd_srai_epi32(bbox.top, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT); - bbox.right = _simd_srai_epi32(bbox.right, KNOB_MACROTILE_X_DIM_FIXED_SHIFT); - bbox.bottom = _simd_srai_epi32(bbox.bottom, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT); + bbox.xmin = _simd_srai_epi32(bbox.xmin, KNOB_MACROTILE_X_DIM_FIXED_SHIFT); + bbox.ymin = _simd_srai_epi32(bbox.ymin, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT); + bbox.xmax = _simd_srai_epi32(bbox.xmax, KNOB_MACROTILE_X_DIM_FIXED_SHIFT); + bbox.ymax = _simd_srai_epi32(bbox.ymax, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT); OSALIGNSIMD(uint32_t) aMTLeft[KNOB_SIMD_WIDTH], aMTRight[KNOB_SIMD_WIDTH], aMTTop[KNOB_SIMD_WIDTH], aMTBottom[KNOB_SIMD_WIDTH]; - _simd_store_si((simdscalari*)aMTLeft, bbox.left); - _simd_store_si((simdscalari*)aMTRight, bbox.right); - _simd_store_si((simdscalari*)aMTTop, bbox.top); - _simd_store_si((simdscalari*)aMTBottom, bbox.bottom); + _simd_store_si((simdscalari*)aMTLeft, bbox.xmin); + _simd_store_si((simdscalari*)aMTRight, bbox.xmax); + _simd_store_si((simdscalari*)aMTTop, bbox.ymin); + _simd_store_si((simdscalari*)aMTBottom, bbox.ymax); // store render target array index OSALIGNSIMD(uint32_t) aRTAI[KNOB_SIMD_WIDTH]; @@ -2543,35 +2513,35 @@ void BinLines( // Calc bounding box of lines simdBBox bbox; - bbox.left = _simd_min_epi32(vXi[0], vXi[1]); - bbox.right = _simd_max_epi32(vXi[0], vXi[1]); - bbox.top = _simd_min_epi32(vYi[0], vYi[1]); - bbox.bottom = _simd_max_epi32(vYi[0], vYi[1]); + bbox.xmin = _simd_min_epi32(vXi[0], vXi[1]); + bbox.xmax = _simd_max_epi32(vXi[0], vXi[1]); + bbox.ymin = _simd_min_epi32(vYi[0], vYi[1]); + bbox.ymax = _simd_max_epi32(vYi[0], vYi[1]); // bloat bbox by line width along minor axis simdscalar vHalfWidth = _simd_set1_ps(rastState.lineWidth / 2.0f); simdscalari vHalfWidthi = fpToFixedPointVertical(vHalfWidth); simdBBox bloatBox; - bloatBox.left = _simd_sub_epi32(bbox.left, vHalfWidthi); - bloatBox.right = _simd_add_epi32(bbox.right, vHalfWidthi); - bloatBox.top = _simd_sub_epi32(bbox.top, vHalfWidthi); - bloatBox.bottom = _simd_add_epi32(bbox.bottom, vHalfWidthi); - - bbox.left = _simd_blendv_epi32(bbox.left, bloatBox.left, vYmajorMask); - bbox.right = _simd_blendv_epi32(bbox.right, bloatBox.right, vYmajorMask); - bbox.top = _simd_blendv_epi32(bloatBox.top, bbox.top, vYmajorMask); - bbox.bottom = _simd_blendv_epi32(bloatBox.bottom, bbox.bottom, vYmajorMask); - - // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since right/bottom edge is exclusive. - bbox.left = _simd_max_epi32(bbox.left, _simd_set1_epi32(state.scissorInFixedPoint.left)); - bbox.top = _simd_max_epi32(bbox.top, _simd_set1_epi32(state.scissorInFixedPoint.top)); - bbox.right = _simd_min_epi32(_simd_sub_epi32(bbox.right, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.right)); - bbox.bottom = _simd_min_epi32(_simd_sub_epi32(bbox.bottom, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.bottom)); + bloatBox.xmin = _simd_sub_epi32(bbox.xmin, vHalfWidthi); + bloatBox.xmax = _simd_add_epi32(bbox.xmax, vHalfWidthi); + bloatBox.ymin = _simd_sub_epi32(bbox.ymin, vHalfWidthi); + bloatBox.ymax = _simd_add_epi32(bbox.ymax, vHalfWidthi); + + bbox.xmin = _simd_blendv_epi32(bbox.xmin, bloatBox.xmin, vYmajorMask); + bbox.xmax = _simd_blendv_epi32(bbox.xmax, bloatBox.xmax, vYmajorMask); + bbox.ymin = _simd_blendv_epi32(bloatBox.ymin, bbox.ymin, vYmajorMask); + bbox.ymax = _simd_blendv_epi32(bloatBox.ymax, bbox.ymax, vYmajorMask); + + // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive. + bbox.xmin = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin)); + bbox.ymin = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin)); + bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax)); + bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax)); // Cull prims completely outside scissor { - simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.left, bbox.right); - simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.top, bbox.bottom); + simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.xmin, bbox.xmax); + simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.ymin, bbox.ymax); simdscalari maskOutsideScissorXY = _simd_or_si(maskOutsideScissorX, maskOutsideScissorY); uint32_t maskOutsideScissor = _simd_movemask_ps(_simd_castsi_ps(maskOutsideScissorXY)); primMask = primMask & ~maskOutsideScissor; @@ -2583,16 +2553,16 @@ void BinLines( } // Convert triangle bbox to macrotile units. - bbox.left = _simd_srai_epi32(bbox.left, KNOB_MACROTILE_X_DIM_FIXED_SHIFT); - bbox.top = _simd_srai_epi32(bbox.top, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT); - bbox.right = _simd_srai_epi32(bbox.right, KNOB_MACROTILE_X_DIM_FIXED_SHIFT); - bbox.bottom = _simd_srai_epi32(bbox.bottom, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT); + bbox.xmin = _simd_srai_epi32(bbox.xmin, KNOB_MACROTILE_X_DIM_FIXED_SHIFT); + bbox.ymin = _simd_srai_epi32(bbox.ymin, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT); + bbox.xmax = _simd_srai_epi32(bbox.xmax, KNOB_MACROTILE_X_DIM_FIXED_SHIFT); + bbox.ymax = _simd_srai_epi32(bbox.ymax, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT); OSALIGNSIMD(uint32_t) aMTLeft[KNOB_SIMD_WIDTH], aMTRight[KNOB_SIMD_WIDTH], aMTTop[KNOB_SIMD_WIDTH], aMTBottom[KNOB_SIMD_WIDTH]; - _simd_store_si((simdscalari*)aMTLeft, bbox.left); - _simd_store_si((simdscalari*)aMTRight, bbox.right); - _simd_store_si((simdscalari*)aMTTop, bbox.top); - _simd_store_si((simdscalari*)aMTBottom, bbox.bottom); + _simd_store_si((simdscalari*)aMTLeft, bbox.xmin); + _simd_store_si((simdscalari*)aMTRight, bbox.xmax); + _simd_store_si((simdscalari*)aMTTop, bbox.ymin); + _simd_store_si((simdscalari*)aMTBottom, bbox.ymax); // transpose verts needed for backend /// @todo modify BE to take non-transformed verts diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h index 367d199..6316156 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.h +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h @@ -240,7 +240,7 @@ void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices, } INLINE -void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, BBOX &bbox) +void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, SWR_RECT &bbox) { // Need horizontal fp min here __m128i vX1 = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 2, 0, 1)); @@ -262,10 +262,10 @@ void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, BBOX &bbox) __m128i vMaxY = _mm_max_epi32(vY, vY1); vMaxY = _mm_max_epi32(vMaxY, vY2); - bbox.left = _mm_extract_epi32(vMinX, 0); - bbox.right = _mm_extract_epi32(vMaxX, 0); - bbox.top = _mm_extract_epi32(vMinY, 0); - bbox.bottom = _mm_extract_epi32(vMaxY, 0); + bbox.xmin = _mm_extract_epi32(vMinX, 0); + bbox.xmax = _mm_extract_epi32(vMaxX, 0); + bbox.ymin = _mm_extract_epi32(vMinY, 0); + bbox.ymax = _mm_extract_epi32(vMaxY, 0); } INLINE diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp index 768b58a..9a8d062 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp @@ -591,7 +591,7 @@ INLINE void UpdateEdgeMasks(const __m256d(&)[3], const __m256d* v template struct ComputeScissorEdges { - INLINE ComputeScissorEdges(const BBOX &triBBox, const BBOX &scissorBBox, const int32_t x, const int32_t y, + INLINE ComputeScissorEdges(const SWR_RECT &triBBox, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y, EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7]){}; }; @@ -604,20 +604,20 @@ struct ComputeScissorEdges ////////////////////////////////////////////////////////////////////////// /// @brief Intersect tri bbox with scissor, compute scissor edge vectors, /// evaluate edge equations and offset them away from pixel center. - INLINE ComputeScissorEdges(const BBOX &triBBox, const BBOX &scissorBBox, const int32_t x, const int32_t y, + INLINE ComputeScissorEdges(const SWR_RECT &triBBox, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y, EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7]) { // if conservative rasterizing, triangle bbox intersected with scissor bbox is used - BBOX scissor; - scissor.left = std::max(triBBox.left, scissorBBox.left); - scissor.right = std::min(triBBox.right, scissorBBox.right); - scissor.top = std::max(triBBox.top, scissorBBox.top); - scissor.bottom = std::min(triBBox.bottom, scissorBBox.bottom); + SWR_RECT scissor; + scissor.xmin = std::max(triBBox.xmin, scissorBBox.xmin); + scissor.xmax = std::min(triBBox.xmax, scissorBBox.xmax); + scissor.ymin = std::max(triBBox.ymin, scissorBBox.ymin); + scissor.ymax = std::min(triBBox.ymax, scissorBBox.ymax); - POS topLeft{scissor.left, scissor.top}; - POS bottomLeft{scissor.left, scissor.bottom}; - POS topRight{scissor.right, scissor.top}; - POS bottomRight{scissor.right, scissor.bottom}; + POS topLeft{scissor.xmin, scissor.ymin}; + POS bottomLeft{scissor.xmin, scissor.ymax}; + POS topRight{scissor.xmax, scissor.ymin}; + POS bottomRight{scissor.xmax, scissor.ymax}; // construct 4 scissor edges in ccw direction ComputeEdgeData(topLeft, bottomLeft, rastEdges[3]); @@ -625,10 +625,10 @@ struct ComputeScissorEdges ComputeEdgeData(bottomRight, topRight, rastEdges[5]); ComputeEdgeData(topRight, topLeft, rastEdges[6]); - vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.left)) + (rastEdges[3].b * (y - scissor.top))); - vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.left)) + (rastEdges[4].b * (y - scissor.bottom))); - vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.right)) + (rastEdges[5].b * (y - scissor.bottom))); - vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.right)) + (rastEdges[6].b * (y - scissor.top))); + vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) + (rastEdges[3].b * (y - scissor.ymin))); + vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) + (rastEdges[4].b * (y - scissor.ymax))); + vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) + (rastEdges[5].b * (y - scissor.ymax))); + vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) + (rastEdges[6].b * (y - scissor.ymin))); // if conservative rasterizing, need to bump the scissor edges out by the conservative uncertainty distance, else do nothing adjustScissorEdge(rastEdges[3].a, rastEdges[3].b, vEdgeFix16[3]); @@ -647,14 +647,14 @@ struct ComputeScissorEdges { ////////////////////////////////////////////////////////////////////////// /// @brief Compute scissor edge vectors and evaluate edge equations - INLINE ComputeScissorEdges(const BBOX &, const BBOX &scissorBBox, const int32_t x, const int32_t y, + INLINE ComputeScissorEdges(const SWR_RECT &, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y, EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7]) { - const BBOX &scissor = scissorBBox; - POS topLeft{scissor.left, scissor.top}; - POS bottomLeft{scissor.left, scissor.bottom}; - POS topRight{scissor.right, scissor.top}; - POS bottomRight{scissor.right, scissor.bottom}; + const SWR_RECT &scissor = scissorBBox; + POS topLeft{scissor.xmin, scissor.ymin}; + POS bottomLeft{scissor.xmin, scissor.ymax}; + POS topRight{scissor.xmax, scissor.ymin}; + POS bottomRight{scissor.xmax, scissor.ymax}; // construct 4 scissor edges in ccw direction ComputeEdgeData(topLeft, bottomLeft, rastEdges[3]); @@ -662,10 +662,10 @@ struct ComputeScissorEdges ComputeEdgeData(bottomRight, topRight, rastEdges[5]); ComputeEdgeData(topRight, topLeft, rastEdges[6]); - vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.left)) + (rastEdges[3].b * (y - scissor.top))); - vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.left)) + (rastEdges[4].b * (y - scissor.bottom))); - vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.right)) + (rastEdges[5].b * (y - scissor.bottom))); - vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.right)) + (rastEdges[6].b * (y - scissor.top))); + vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) + (rastEdges[3].b * (y - scissor.ymin))); + vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) + (rastEdges[4].b * (y - scissor.ymax))); + vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) + (rastEdges[5].b * (y - scissor.ymax))); + vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) + (rastEdges[6].b * (y - scissor.ymin))); } }; @@ -964,23 +964,23 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, triDesc.Z[2] += ComputeDepthBias(&rastState, &triDesc, workDesc.pTriBuffer + 8); // Calc bounding box of triangle - OSALIGNSIMD(BBOX) bbox; + OSALIGNSIMD(SWR_RECT) bbox; calcBoundingBoxInt(vXi, vYi, bbox); if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID) { // If we're rasterizing a degenerate triangle, expand bounding box to guarantee the BBox is valid - bbox.left--; bbox.right++; bbox.top--; bbox.bottom++; - SWR_ASSERT(state.scissorInFixedPoint.left >= 0 && state.scissorInFixedPoint.top >= 0, + bbox.xmin--; bbox.xmax++; bbox.ymin--; bbox.ymax++; + SWR_ASSERT(state.scissorInFixedPoint.xmin >= 0 && state.scissorInFixedPoint.ymin >= 0, "Conservative rast degenerate handling requires a valid scissor rect"); } // Intersect with scissor/viewport - OSALIGNSIMD(BBOX) intersect; - intersect.left = std::max(bbox.left, state.scissorInFixedPoint.left); - intersect.right = std::min(bbox.right - 1, state.scissorInFixedPoint.right); - intersect.top = std::max(bbox.top, state.scissorInFixedPoint.top); - intersect.bottom = std::min(bbox.bottom - 1, state.scissorInFixedPoint.bottom); + OSALIGNSIMD(SWR_RECT) intersect; + intersect.xmin = std::max(bbox.xmin, state.scissorInFixedPoint.xmin); + intersect.xmax = std::min(bbox.xmax - 1, state.scissorInFixedPoint.xmax); + intersect.ymin = std::max(bbox.ymin, state.scissorInFixedPoint.ymin); + intersect.ymax = std::min(bbox.ymax - 1, state.scissorInFixedPoint.ymax); triDesc.triFlags = workDesc.triFlags; @@ -992,20 +992,20 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED; int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1; - intersect.left = std::max(intersect.left, macroBoxLeft); - intersect.top = std::max(intersect.top, macroBoxTop); - intersect.right = std::min(intersect.right, macroBoxRight); - intersect.bottom = std::min(intersect.bottom, macroBoxBottom); + intersect.xmin = std::max(intersect.xmin, macroBoxLeft); + intersect.ymin = std::max(intersect.ymin, macroBoxTop); + intersect.xmax = std::min(intersect.xmax, macroBoxRight); + intersect.ymax = std::min(intersect.ymax, macroBoxBottom); - SWR_ASSERT(intersect.left <= intersect.right && intersect.top <= intersect.bottom && intersect.left >= 0 && intersect.right >= 0 && intersect.top >= 0 && intersect.bottom >= 0); + SWR_ASSERT(intersect.xmin <= intersect.xmax && intersect.ymin <= intersect.ymax && intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 && intersect.ymax >= 0); RDTSC_STOP(BETriangleSetup, 0, pDC->drawId); // update triangle desc - uint32_t minTileX = intersect.left >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT); - uint32_t minTileY = intersect.top >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT); - uint32_t maxTileX = intersect.right >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT); - uint32_t maxTileY = intersect.bottom >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT); + uint32_t minTileX = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT); + uint32_t minTileY = intersect.ymin >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT); + uint32_t maxTileX = intersect.xmax >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT); + uint32_t maxTileY = intersect.ymax >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT); uint32_t numTilesX = maxTileX - minTileX + 1; uint32_t numTilesY = maxTileY - minTileY + 1; @@ -1020,8 +1020,8 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, // Step to pixel center of top-left pixel of the triangle bbox // Align intersect bbox (top/left) to raster tile's (top/left). - int32_t x = AlignDown(intersect.left, (FIXED_POINT_SCALE * KNOB_TILE_X_DIM)); - int32_t y = AlignDown(intersect.top, (FIXED_POINT_SCALE * KNOB_TILE_Y_DIM)); + int32_t x = AlignDown(intersect.xmin, (FIXED_POINT_SCALE * KNOB_TILE_X_DIM)); + int32_t y = AlignDown(intersect.ymin, (FIXED_POINT_SCALE * KNOB_TILE_Y_DIM)); // convenience typedef typedef typename RT::NumRasterSamplesT NumRasterSamplesT; @@ -1663,17 +1663,17 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi // make sure this macrotile intersects the triangle __m128i vXai = fpToFixedPoint(vXa); __m128i vYai = fpToFixedPoint(vYa); - OSALIGNSIMD(BBOX) bboxA; + OSALIGNSIMD(SWR_RECT) bboxA; calcBoundingBoxInt(vXai, vYai, bboxA); - if (!(bboxA.left > macroBoxRight || - bboxA.left > state.scissorInFixedPoint.right || - bboxA.right - 1 < macroBoxLeft || - bboxA.right - 1 < state.scissorInFixedPoint.left || - bboxA.top > macroBoxBottom || - bboxA.top > state.scissorInFixedPoint.bottom || - bboxA.bottom - 1 < macroBoxTop || - bboxA.bottom - 1 < state.scissorInFixedPoint.top)) { + if (!(bboxA.xmin > macroBoxRight || + bboxA.xmin > state.scissorInFixedPoint.xmax || + bboxA.xmax - 1 < macroBoxLeft || + bboxA.xmax - 1 < state.scissorInFixedPoint.xmin || + bboxA.ymin > macroBoxBottom || + bboxA.ymin > state.scissorInFixedPoint.ymax || + bboxA.ymax - 1 < macroBoxTop || + bboxA.ymax - 1 < state.scissorInFixedPoint.ymin)) { // rasterize triangle pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc); } @@ -1739,14 +1739,14 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi vYai = fpToFixedPoint(vYa); calcBoundingBoxInt(vXai, vYai, bboxA); - if (!(bboxA.left > macroBoxRight || - bboxA.left > state.scissorInFixedPoint.right || - bboxA.right - 1 < macroBoxLeft || - bboxA.right - 1 < state.scissorInFixedPoint.left || - bboxA.top > macroBoxBottom || - bboxA.top > state.scissorInFixedPoint.bottom || - bboxA.bottom - 1 < macroBoxTop || - bboxA.bottom - 1 < state.scissorInFixedPoint.top)) { + if (!(bboxA.xmin > macroBoxRight || + bboxA.xmin > state.scissorInFixedPoint.xmax || + bboxA.xmax - 1 < macroBoxLeft || + bboxA.xmax - 1 < state.scissorInFixedPoint.xmin || + bboxA.ymin > macroBoxBottom || + bboxA.ymin > state.scissorInFixedPoint.ymax || + bboxA.ymax - 1 < macroBoxTop || + bboxA.ymax - 1 < state.scissorInFixedPoint.ymin)) { // rasterize triangle pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc); } diff --git a/src/gallium/drivers/swr/rasterizer/core/utils.h b/src/gallium/drivers/swr/rasterizer/core/utils.h index 0a9430a..0087892 100644 --- a/src/gallium/drivers/swr/rasterizer/core/utils.h +++ b/src/gallium/drivers/swr/rasterizer/core/utils.h @@ -33,6 +33,7 @@ #include "common/os.h" #include "common/simdintrin.h" #include "common/swr_assert.h" +#include "core/api.h" #if defined(_WIN64) || defined(__x86_64__) #define _MM_INSERT_EPI64 _mm_insert_epi64 @@ -74,53 +75,12 @@ INLINE __m128i _MM_INSERT_EPI64(__m128i a, int64_t b, const int32_t ndx) } #endif -OSALIGNLINE(struct) BBOX -{ - int top{ 0 }; - int bottom{ 0 }; - int left{ 0 }; - int right{ 0 }; - - BBOX() {} - BBOX(int t, int b, int l, int r) : top(t), bottom(b), left(l), right(r) {} - - bool operator==(const BBOX& rhs) - { - return (this->top == rhs.top && - this->bottom == rhs.bottom && - this->left == rhs.left && - this->right == rhs.right); - } - - bool operator!=(const BBOX& rhs) - { - return !(*this == rhs); - } - - BBOX& Intersect(const BBOX& other) - { - this->top = std::max(this->top, other.top); - this->bottom = std::min(this->bottom, other.bottom); - this->left = std::max(this->left, other.left); - this->right = std::min(this->right, other.right); - - if (right - left < 0 || - bottom - top < 0) - { - // Zero area - top = bottom = left = right = 0; - } - - return *this; - } -}; - struct simdBBox { - simdscalari top; - simdscalari bottom; - simdscalari left; - simdscalari right; + simdscalari ymin; + simdscalari ymax; + simdscalari xmin; + simdscalari xmax; }; INLINE diff --git a/src/gallium/drivers/swr/swr_clear.cpp b/src/gallium/drivers/swr/swr_clear.cpp index 4b02dd1..a65f8f4 100644 --- a/src/gallium/drivers/swr/swr_clear.cpp +++ b/src/gallium/drivers/swr/swr_clear.cpp @@ -67,17 +67,9 @@ swr_clear(struct pipe_context *pipe, ((union pipe_color_union *)color)->f[3] = 1.0; /* cast off your const'd-ness */ #endif - /* Reset viewport to full framebuffer width/height before clear, then - * restore it */ - /* Scissor affects clear, viewport should not */ - ctx->dirty |= SWR_NEW_VIEWPORT; - SWR_VIEWPORT vp = {0}; - vp.width = ctx->framebuffer.width; - vp.height = ctx->framebuffer.height; - SwrSetViewports(ctx->swrContext, 1, &vp, NULL); - swr_update_draw_context(ctx); - SwrClearRenderTarget(ctx->swrContext, clearMask, color->f, depth, stencil); + SwrClearRenderTarget(ctx->swrContext, clearMask, color->f, depth, stencil, + ctx->swr_scissor); } diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h index b4553fb..6854d69 100644 --- a/src/gallium/drivers/swr/swr_context.h +++ b/src/gallium/drivers/swr/swr_context.h @@ -121,6 +121,7 @@ struct swr_context { struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; + SWR_RECT swr_scissor; struct pipe_sampler_view * sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS]; diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp index 0f6a8c6..a46aef9 100644 --- a/src/gallium/drivers/swr/swr_draw.cpp +++ b/src/gallium/drivers/swr/swr_draw.cpp @@ -258,37 +258,13 @@ swr_store_render_target(struct pipe_context *pipe, /* Only proceed if there's a valid surface to store to */ if (renderTarget->pBaseAddress) { - /* Set viewport to full renderTarget width/height and disable scissor - * before StoreTiles */ - boolean change_viewport = - (ctx->derived.vp.x != 0.0f || ctx->derived.vp.y != 0.0f - || ctx->derived.vp.width != renderTarget->width - || ctx->derived.vp.height != renderTarget->height); - if (change_viewport) { - SWR_VIEWPORT vp = {0}; - vp.width = renderTarget->width; - vp.height = renderTarget->height; - SwrSetViewports(ctx->swrContext, 1, &vp, NULL); - } - - boolean scissor_enable = ctx->derived.rastState.scissorEnable; - if (scissor_enable) { - ctx->derived.rastState.scissorEnable = FALSE; - SwrSetRastState(ctx->swrContext, &ctx->derived.rastState); - } - swr_update_draw_context(ctx); + SWR_RECT full_rect = + {0, 0, (int32_t)renderTarget->width, (int32_t)renderTarget->height}; SwrStoreTiles(ctx->swrContext, (enum SWR_RENDERTARGET_ATTACHMENT)attachment, - post_tile_state); - - /* Restore viewport and scissor enable */ - if (change_viewport) - SwrSetViewports(ctx->swrContext, 1, &ctx->derived.vp, &ctx->derived.vpm); - if (scissor_enable) { - ctx->derived.rastState.scissorEnable = scissor_enable; - SwrSetRastState(ctx->swrContext, &ctx->derived.rastState); - } + post_tile_state, + full_rect); } } diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index de41ddc..4c9a432 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -575,6 +575,10 @@ swr_set_scissor_states(struct pipe_context *pipe, struct swr_context *ctx = swr_context(pipe); ctx->scissor = *scissor; + ctx->swr_scissor.xmin = scissor->minx; + ctx->swr_scissor.xmax = scissor->maxx; + ctx->swr_scissor.ymin = scissor->miny; + ctx->swr_scissor.ymax = scissor->maxy; ctx->dirty |= SWR_NEW_SCISSOR; } @@ -930,10 +934,7 @@ swr_update_derived(struct pipe_context *pipe, /* Scissor */ if (ctx->dirty & SWR_NEW_SCISSOR) { - pipe_scissor_state *scissor = &ctx->scissor; - BBOX bbox(scissor->miny, scissor->maxy, - scissor->minx, scissor->maxx); - SwrSetScissorRects(ctx->swrContext, 1, &bbox); + SwrSetScissorRects(ctx->swrContext, 1, &ctx->swr_scissor); } /* Viewport */ -- 2.7.4