swr: [rasterizer core] implement depth bounds test
authorTim Rowley <timothy.o.rowley@intel.com>
Fri, 7 Oct 2016 02:06:59 +0000 (21:06 -0500)
committerTim Rowley <timothy.o.rowley@intel.com>
Tue, 11 Oct 2016 16:22:04 +0000 (11:22 -0500)
Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
src/gallium/drivers/swr/rasterizer/core/api.cpp
src/gallium/drivers/swr/rasterizer/core/api.h
src/gallium/drivers/swr/rasterizer/core/backend.cpp
src/gallium/drivers/swr/rasterizer/core/backend.h
src/gallium/drivers/swr/rasterizer/core/context.h
src/gallium/drivers/swr/rasterizer/core/state.h

index a90d993..cb0098d 100644 (file)
@@ -422,6 +422,10 @@ void SetupDefaultState(SWR_CONTEXT *pContext)
 
     pState->rastState.cullMode = SWR_CULLMODE_NONE;
     pState->rastState.frontWinding = SWR_FRONTWINDING_CCW;
+
+    pState->depthBoundsState.depthBoundsTestEnable = false;
+    pState->depthBoundsState.depthBoundsTestMinValue = 0.0f;
+    pState->depthBoundsState.depthBoundsTestMaxValue = 1.0f;
 }
 
 void SwrSync(HANDLE hContext, PFN_CALLBACK_FUNC pfnFunc, uint64_t userData, uint64_t userData2, uint64_t userData3)
@@ -628,6 +632,15 @@ void SwrSetBackendState(
     pState->backendState = *pBEState;
 }
 
+void SwrSetDepthBoundsState(
+    HANDLE hContext,
+    SWR_DEPTH_BOUNDS_STATE *pDBState)
+{
+    API_STATE* pState = GetDrawState(GetContext(hContext));
+
+    pState->depthBoundsState = *pDBState;
+}
+
 void SwrSetPixelShaderState(
     HANDLE hContext,
     SWR_PS_STATE *pPSState)
index 93337e6..8ad40ea 100644 (file)
@@ -413,6 +413,14 @@ void SWR_API SwrSetBackendState(
     SWR_BACKEND_STATE *pState);
 
 //////////////////////////////////////////////////////////////////////////
+/// @brief Set depth bounds state
+/// @param hContext - Handle passed back from SwrCreateContext
+/// @param pState - Pointer to state.
+void SWR_API SwrSetDepthBoundsState(
+    HANDLE hContext,
+    SWR_DEPTH_BOUNDS_STATE *pState);
+
+//////////////////////////////////////////////////////////////////////////
 /// @brief Set pixel shader state
 /// @param hContext - Handle passed back from SwrCreateContext
 /// @param pState - Pointer to state.
index 888af79..ad9f0d3 100644 (file)
@@ -501,10 +501,22 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
 
                 simdmask clipCoverageMask = coverageMask & MASK;
                 // interpolate user clip distance if available
-                if(rastState.clipDistanceMask)
+                if (rastState.clipDistanceMask)
                 {
                     clipCoverageMask &= ~ComputeUserClipMask(rastState.clipDistanceMask, work.pUserClipBuffer,
-                                                             psContext.vI.center, psContext.vJ.center);
+                        psContext.vI.center, psContext.vJ.center);
+                }
+
+                if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
+                {
+                    static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
+
+                    const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthBase));
+
+                    const float minz = state.depthBoundsState.depthBoundsTestMinValue;
+                    const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
+
+                    clipCoverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
                 }
 
                 simdscalar vCoverageMask = vMask(clipCoverageMask);
@@ -724,14 +736,26 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
                             psContext.vI.sample, psContext.vJ.sample);
                     }
 
-                    simdscalar vCoverageMask = vMask(coverageMask);
-                    simdscalar depthPassMask = vCoverageMask;
-                    simdscalar stencilPassMask = vCoverageMask;
-
                     // offset depth/stencil buffers current sample
                     uint8_t *pDepthSample = pDepthBase + RasterTileDepthOffset(sample);
                     uint8_t *pStencilSample = pStencilBase + RasterTileStencilOffset(sample);
 
+                    if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
+                    {
+                        static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
+
+                        const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthSample));
+
+                        const float minz = state.depthBoundsState.depthBoundsTestMinValue;
+                        const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
+
+                        coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
+                    }
+
+                    simdscalar vCoverageMask = vMask(coverageMask);
+                    simdscalar depthPassMask = vCoverageMask;
+                    simdscalar stencilPassMask = vCoverageMask;
+
                     // Early-Z?
                     if (T::bCanEarlyZ)
                     {
@@ -802,6 +826,8 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
                 }
                 work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
             }
+
+Endtile:
             AR_BEGIN(BEEndTile, pDC->drawId);
             if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
             {
@@ -1113,13 +1139,25 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
                             psContext.vI.sample, psContext.vJ.sample);
                     }
 
-                    simdscalar vCoverageMask = vMask(coverageMask);
-                    simdscalar stencilPassMask = vCoverageMask;
-
                     // offset depth/stencil buffers current sample
                     uint8_t *pDepthSample = pDepthBase + RasterTileDepthOffset(sample);
                     uint8_t *pStencilSample = pStencilBase + RasterTileStencilOffset(sample);
 
+                    if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
+                    {
+                        static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
+
+                        const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthSample));
+
+                        const float minz = state.depthBoundsState.depthBoundsTestMinValue;
+                        const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
+
+                        coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
+                    }
+
+                    simdscalar vCoverageMask = vMask(coverageMask);
+                    simdscalar stencilPassMask = vCoverageMask;
+
                     AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
                     simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
                         psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
@@ -1131,6 +1169,8 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
                     uint32_t statCount = _mm_popcnt_u32(statMask);
                     UPDATE_STAT(DepthPassCount, statCount);
                 }
+
+Endtile:
                 work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
             }
             pDepthBase += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
index da4f46e..fcc78f7 100644 (file)
@@ -410,6 +410,14 @@ INLINE void CalcCentroidBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CON
     psContext.vOneOverW.centroid = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.centroid, psContext.vJ.centroid);
 }
 
+INLINE simdmask CalcDepthBoundsAcceptMask(simdscalar z, float minz, float maxz)
+{
+    const simdscalar minzMask = _simd_cmpge_ps(z, _simd_set1_ps(minz));
+    const simdscalar maxzMask = _simd_cmple_ps(z, _simd_set1_ps(maxz));
+
+    return _simd_movemask_ps(_simd_and_ps(minzMask, maxzMask));
+}
+
 template<typename T>
 INLINE uint32_t GetNumOMSamples(SWR_MULTISAMPLE_COUNT blendSampleCount)
 {
@@ -490,6 +498,18 @@ struct PixelRateZTestLoop
             uint8_t *pDepthSample = pDepthBase + RasterTileDepthOffset(sample);
             uint8_t * pStencilSample = pStencilBase + RasterTileStencilOffset(sample);
 
+            if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
+            {
+                static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
+
+                const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthSample));
+
+                const float minz = state.depthBoundsState.depthBoundsTestMinValue;
+                const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
+
+                vCoverageMask[sample] = _simd_and_ps(vCoverageMask[sample], vMask(CalcDepthBoundsAcceptMask(z, minz, maxz)));
+            }
+
             // ZTest for this sample
             ///@todo Need to uncomment out this bucket.
             //AR_BEGIN(BEDepthBucket, pDC->drawId);
index 1f56dfc..9a26e33 100644 (file)
@@ -286,6 +286,8 @@ OSALIGNLINE(struct) API_STATE
     // Backend state
     SWR_BACKEND_STATE       backendState;
 
+    SWR_DEPTH_BOUNDS_STATE  depthBoundsState;
+
     // PS - Pixel shader state
     SWR_PS_STATE            psState;
 
index 7efae56..93e4565 100644 (file)
@@ -1079,3 +1079,12 @@ struct SWR_PS_STATE
     uint32_t forceEarlyZ        : 1;    // force execution of early depth/stencil test
 
 };
+
+// depth bounds state
+struct SWR_DEPTH_BOUNDS_STATE
+{
+    bool    depthBoundsTestEnable;
+    float   depthBoundsTestMinValue;
+    float   depthBoundsTestMaxValue;
+};
+