swr/rast: Pull of RTAI gather & offset out of clip/bin code
authorTim Rowley <timothy.o.rowley@intel.com>
Mon, 11 Dec 2017 21:51:46 +0000 (15:51 -0600)
committerTim Rowley <timothy.o.rowley@intel.com>
Fri, 15 Dec 2017 16:56:40 +0000 (10:56 -0600)
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
src/gallium/drivers/swr/rasterizer/core/binner.cpp
src/gallium/drivers/swr/rasterizer/core/clip.cpp
src/gallium/drivers/swr/rasterizer/core/clip.h
src/gallium/drivers/swr/rasterizer/core/context.h
src/gallium/drivers/swr/rasterizer/core/frontend.cpp
src/gallium/drivers/swr/rasterizer/core/frontend.h
src/gallium/drivers/swr/rasterizer/core/pa.h

index a664ed8..7ef87c4 100644 (file)
@@ -45,7 +45,8 @@ void BinPostSetupLinesImpl(
     typename SIMD_T::Float recipW[],
     uint32_t primMask,
     typename SIMD_T::Integer const &primID,
-    typename SIMD_T::Integer const &viewportIdx);
+    typename SIMD_T::Integer const &viewportIdx,
+    typename SIMD_T::Integer const &rtIdx);
 
 template <typename SIMD_T, uint32_t SIMD_WIDTH>
 void BinPostSetupPointsImpl(
@@ -55,7 +56,8 @@ void BinPostSetupPointsImpl(
     typename SIMD_T::Vec4 prim[],
     uint32_t primMask,
     typename SIMD_T::Integer const &primID,
-    typename SIMD_T::Integer const &viewportIdx);
+    typename SIMD_T::Integer const &viewportIdx,
+    typename SIMD_T::Integer const &rtIdx);
 
 //////////////////////////////////////////////////////////////////////////
 /// @brief Processes attributes for the backend based on linkage mask and
@@ -308,9 +310,11 @@ void SIMDCALL BinTrianglesImpl(
     typename SIMD_T::Vec4 tri[3],
     uint32_t triMask,
     typename SIMD_T::Integer const &primID,
-    typename SIMD_T::Integer const &viewportIdx)
+    typename SIMD_T::Integer const &viewportIdx,
+    typename SIMD_T::Integer const &rtIdx)
 {
     SWR_CONTEXT *pContext = pDC->pContext;
+    const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx);
 
     AR_BEGIN(FEBinTriangles, pDC->drawId);
 
@@ -604,21 +608,21 @@ endBinTriangles:
         recipW[0] = vRecipW0;
         recipW[1] = vRecipW1;
 
-        BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx);
+        BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
 
         line[0] = tri[1];
         line[1] = tri[2];
         recipW[0] = vRecipW1;
         recipW[1] = vRecipW2;
 
-        BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx);
+        BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
 
         line[0] = tri[2];
         line[1] = tri[0];
         recipW[0] = vRecipW2;
         recipW[1] = vRecipW0;
 
-        BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx);
+        BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
 
         AR_END(FEBinTriangles, 1);
         return;
@@ -626,9 +630,9 @@ endBinTriangles:
     else if (rastState.fillMode == SWR_FILLMODE_POINT)
     {
         // Bin 3 points
-        BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[0], triMask, primID, viewportIdx);
-        BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[1], triMask, primID, viewportIdx);
-        BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[2], triMask, primID, viewportIdx);
+        BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[0], triMask, primID, viewportIdx, rtIdx);
+        BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[1], triMask, primID, viewportIdx, rtIdx);
+        BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[2], triMask, primID, viewportIdx, rtIdx);
 
         AR_END(FEBinTriangles, 1);
         return;
@@ -659,22 +663,6 @@ endBinTriangles:
     TransposeVertices(vHorizZ, tri[0].z, tri[1].z, tri[2].z);
     TransposeVertices(vHorizW, vRecipW0, vRecipW1, vRecipW2);
 
-    // store render target array index
-    OSALIGNSIMD16(uint32_t) aRTAI[SIMD_WIDTH];
-    if (state.backendState.readRenderTargetArrayIndex)
-    {
-        typename SIMD_T::Vec4 vRtai[3];
-        pa.Assemble(VERTEX_SGV_SLOT, vRtai);
-        typename SIMD_T::Integer vRtaii;
-        vRtaii = SIMD_T::castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]);
-        SIMD_T::store_si(reinterpret_cast<typename SIMD_T::Integer *>(aRTAI), vRtaii);
-    }
-    else
-    {
-        SIMD_T::store_si(reinterpret_cast<typename SIMD_T::Integer *>(aRTAI), SIMD_T::setzero_si());
-    }
-
-
     // scan remaining valid triangles and bin each separately
     while (_BitScanForward(&triIndex, triMask))
     {
@@ -763,9 +751,10 @@ void BinTriangles(
     simdvector tri[3],
     uint32_t triMask,
     simdscalari const &primID,
-    simdscalari const &viewportIdx)
+    simdscalari const &viewportIdx,
+    simdscalari const &rtIdx)
 {
-    BinTrianglesImpl<SIMD256, KNOB_SIMD_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx);
+    BinTrianglesImpl<SIMD256, KNOB_SIMD_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx, rtIdx);
 }
 
 #if USE_SIMD16_FRONTEND
@@ -777,9 +766,10 @@ void SIMDCALL BinTriangles_simd16(
     simd16vector tri[3],
     uint32_t triMask,
     simd16scalari const &primID,
-    simd16scalari const &viewportIdx)
+    simd16scalari const &viewportIdx,
+    simd16scalari const &rtIdx)
 {
-    BinTrianglesImpl<SIMD512, KNOB_SIMD16_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx);
+    BinTrianglesImpl<SIMD512, KNOB_SIMD16_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx, rtIdx);
 }
 
 #endif
@@ -828,7 +818,8 @@ void BinPostSetupPointsImpl(
     typename SIMD_T::Vec4 prim[],
     uint32_t primMask,
     typename SIMD_T::Integer const &primID,
-    typename SIMD_T::Integer const &viewportIdx)
+    typename SIMD_T::Integer const &viewportIdx,
+    typename SIMD_T::Integer const &rtIdx)
 {
     SWR_CONTEXT *pContext = pDC->pContext;
 
@@ -896,19 +887,8 @@ void BinPostSetupPointsImpl(
         SIMD_T::store_ps(reinterpret_cast<float *>(aZ), primVerts.z);
 
         // store render target array index
-        OSALIGNSIMD16(uint32_t) aRTAI[SIMD_WIDTH];
-        if (state.backendState.readRenderTargetArrayIndex)
-        {
-            typename SIMD_T::Vec4 vRtai;
-            pa.Assemble(VERTEX_SGV_SLOT, &vRtai);
-            typename SIMD_T::Integer vRtaii = SIMD_T::castps_si(vRtai[VERTEX_SGV_RTAI_COMP]);
-            SIMD_T::store_si(reinterpret_cast<typename SIMD_T::Integer *>(aRTAI), vRtaii);
-        }
-        else
-        {
-            SIMD_T::store_si(reinterpret_cast<typename SIMD_T::Integer *>(aRTAI), SIMD_T::setzero_si());
-        }
-
+        const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx);
+        
         uint32_t *pPrimID = (uint32_t *)&primID;
         DWORD primIndex = 0;
 
@@ -1155,7 +1135,8 @@ void BinPointsImpl(
     typename SIMD_T::Vec4 prim[3],
     uint32_t primMask,
     typename SIMD_T::Integer const &primID,
-    typename SIMD_T::Integer const &viewportIdx)
+    typename SIMD_T::Integer const &viewportIdx,
+    typename SIMD_T::Integer const &rtIdx)
 {
     const API_STATE& state = GetApiState(pDC);
     const SWR_FRONTEND_STATE& feState = state.frontendState;
@@ -1193,7 +1174,8 @@ void BinPointsImpl(
         prim,
         primMask,
         primID,
-        viewportIdx);
+        viewportIdx,
+        rtIdx);
 }
 
 void BinPoints(
@@ -1203,7 +1185,8 @@ void BinPoints(
     simdvector prim[3],
     uint32_t primMask,
     simdscalari const &primID,
-    simdscalari const &viewportIdx)
+    simdscalari const &viewportIdx,
+    simdscalari const &rtIdx)
 {
     BinPointsImpl<SIMD256, KNOB_SIMD_WIDTH>(
         pDC,
@@ -1212,7 +1195,8 @@ void BinPoints(
         prim,
         primMask,
         primID,
-        viewportIdx);
+        viewportIdx,
+        rtIdx);
 }
 
 #if USE_SIMD16_FRONTEND
@@ -1223,7 +1207,8 @@ void SIMDCALL BinPoints_simd16(
     simd16vector prim[3],
     uint32_t primMask,
     simd16scalari const &primID,
-    simd16scalari const &viewportIdx)
+    simd16scalari const &viewportIdx,
+    simd16scalari const & rtIdx)
 {
     BinPointsImpl<SIMD512, KNOB_SIMD16_WIDTH>(
         pDC,
@@ -1232,7 +1217,8 @@ void SIMDCALL BinPoints_simd16(
         prim,
         primMask,
         primID,
-        viewportIdx);
+        viewportIdx,
+        rtIdx);
 }
 
 #endif
@@ -1253,9 +1239,11 @@ void BinPostSetupLinesImpl(
     typename SIMD_T::Float recipW[],
     uint32_t primMask,
     typename SIMD_T::Integer const &primID,
-    typename SIMD_T::Integer const &viewportIdx)
+    typename SIMD_T::Integer const &viewportIdx,
+    typename SIMD_T::Integer const &rtIdx)
 {
     SWR_CONTEXT *pContext = pDC->pContext;
+    const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx);
 
     AR_BEGIN(FEBinLines, pDC->drawId);
 
@@ -1376,20 +1364,6 @@ void BinPostSetupLinesImpl(
     TransposeVertices(vHorizZ, prim[0].z, prim[1].z, SIMD_T::setzero_ps());
     TransposeVertices(vHorizW, vRecipW0,  vRecipW1,  SIMD_T::setzero_ps());
 
-    // store render target array index
-    OSALIGNSIMD16(uint32_t) aRTAI[SIMD_WIDTH];
-    if (state.backendState.readRenderTargetArrayIndex)
-    {
-        typename SIMD_T::Vec4 vRtai[2];
-        pa.Assemble(VERTEX_SGV_SLOT, vRtai);
-        typename SIMD_T::Integer vRtaii = SIMD_T::castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]);
-        SIMD_T::store_si(reinterpret_cast<typename SIMD_T::Integer *>(aRTAI), vRtaii);
-    }
-    else
-    {
-        SIMD_T::store_si(reinterpret_cast<typename SIMD_T::Integer *>(aRTAI), SIMD_T::setzero_si());
-    }
-
     // scan remaining valid prims and bin each separately
     DWORD primIndex;
     while (_BitScanForward(&primIndex, primMask))
@@ -1471,7 +1445,8 @@ void SIMDCALL BinLinesImpl(
     typename SIMD_T::Vec4 prim[3],
     uint32_t primMask,
     typename SIMD_T::Integer const &primID,
-    typename SIMD_T::Integer const &viewportIdx)
+    typename SIMD_T::Integer const &viewportIdx,
+    typename SIMD_T::Integer const & rtIdx)
 {
     const API_STATE& state = GetApiState(pDC);
     const SWR_RASTSTATE& rastState = state.rastState;
@@ -1522,7 +1497,8 @@ void SIMDCALL BinLinesImpl(
         vRecipW,
         primMask,
         primID,
-        viewportIdx);
+        viewportIdx,
+        rtIdx);
 }
 
 void BinLines(
@@ -1532,9 +1508,10 @@ void BinLines(
     simdvector prim[],
     uint32_t primMask,
     simdscalari const &primID,
-    simdscalari const &viewportIdx)
+    simdscalari const &viewportIdx,
+    simdscalari const &rtIdx)
 {
-    BinLinesImpl<SIMD256, KNOB_SIMD_WIDTH>(pDC, pa, workerId, prim, primMask, primID, viewportIdx);
+    BinLinesImpl<SIMD256, KNOB_SIMD_WIDTH>(pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
 }
 
 #if USE_SIMD16_FRONTEND
@@ -1545,9 +1522,10 @@ void SIMDCALL BinLines_simd16(
     simd16vector prim[3],
     uint32_t primMask,
     simd16scalari const &primID,
-    simd16scalari const &viewportIdx)
+    simd16scalari const &viewportIdx,
+    simd16scalari const &rtIdx)
 {
-    BinLinesImpl<SIMD512, KNOB_SIMD16_WIDTH>(pDC, pa, workerId, prim, primMask, primID, viewportIdx);
+    BinLinesImpl<SIMD512, KNOB_SIMD16_WIDTH>(pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
 }
 
 #endif
index d4da2c3..7205802 100644 (file)
@@ -160,35 +160,39 @@ int ClipTriToPlane( const float *pInPts, int numInPts,
     return i;
 }
 
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx)
+void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
+                   simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
 {
     SWR_CONTEXT *pContext = pDC->pContext;
     AR_BEGIN(FEClipTriangles, pDC->drawId);
     Clipper<SIMD256, 3> clipper(workerId, pDC);
-    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
     AR_END(FEClipTriangles, 1);
 }
 
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx)
+void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
+               simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
 {
     SWR_CONTEXT *pContext = pDC->pContext;
     AR_BEGIN(FEClipLines, pDC->drawId);
     Clipper<SIMD256, 2> clipper(workerId, pDC);
-    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
     AR_END(FEClipLines, 1);
 }
 
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx)
+void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
+                simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
 {
     SWR_CONTEXT *pContext = pDC->pContext;
     AR_BEGIN(FEClipPoints, pDC->drawId);
     Clipper<SIMD256, 1> clipper(workerId, pDC);
-    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
     AR_END(FEClipPoints, 1);
 }
 
 #if USE_SIMD16_FRONTEND
-void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx)
+void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
+                                   simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
 {
     SWR_CONTEXT *pContext = pDC->pContext;
     AR_BEGIN(FEClipTriangles, pDC->drawId);
@@ -198,12 +202,13 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t wor
     Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
 
     pa.useAlternateOffset = false;
-    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
 
     AR_END(FEClipTriangles, 1);
 }
 
-void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx)
+void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
+                               simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
 {
     SWR_CONTEXT *pContext = pDC->pContext;
     AR_BEGIN(FEClipLines, pDC->drawId);
@@ -213,12 +218,13 @@ void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerI
     Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
 
     pa.useAlternateOffset = false;
-    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
 
     AR_END(FEClipLines, 1);
 }
 
-void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx)
+void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
+                                simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
 {
     SWR_CONTEXT *pContext = pDC->pContext;
     AR_BEGIN(FEClipPoints, pDC->drawId);
@@ -228,7 +234,7 @@ void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t worker
     Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
 
     pa.useAlternateOffset = false;
-    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
+    clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
 
     AR_END(FEClipPoints, 1);
 }
index 8b94766..e5e00d4 100644 (file)
@@ -178,11 +178,11 @@ struct BinnerChooser<SIMD256>
         };
     }
 
-    void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD256::Vec4 prims[], uint32_t primMask, SIMD256::Integer const &primID, SIMD256::Integer &viewportIdx)
+    void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD256::Vec4 prims[], uint32_t primMask, SIMD256::Integer const &primID, SIMD256::Integer &viewportIdx, SIMD256::Integer &rtIdx)
     {
         SWR_ASSERT(pfnBinFunc != nullptr);
 
-        pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx);
+        pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx, rtIdx);
     }
 };
 
@@ -231,11 +231,11 @@ struct BinnerChooser<SIMD512>
         };
     }
 
-    void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD512::Vec4 prims[], uint32_t primMask, SIMD512::Integer const &primID, SIMD512::Integer &viewportIdx)
+    void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD512::Vec4 prims[], uint32_t primMask, SIMD512::Integer const &primID, SIMD512::Integer &viewportIdx, SIMD512::Integer &rtIdx)
     {
         SWR_ASSERT(pfnBinFunc != nullptr);
 
-        pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx);
+        pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx, rtIdx);
     }
 };
 
@@ -437,7 +437,8 @@ public:
         return SIMD_T::movemask_ps(vClipCullMask);
     }
 
-    void ClipSimd(const typename SIMD_T::Vec4 prim[], const typename SIMD_T::Float &vPrimMask, const typename SIMD_T::Float &vClipMask, PA_STATE &pa, const typename SIMD_T::Integer &vPrimId, const typename SIMD_T::Integer &vViewportIdx)
+    void ClipSimd(const typename SIMD_T::Vec4 prim[], const typename SIMD_T::Float &vPrimMask, const typename SIMD_T::Float &vClipMask, PA_STATE &pa,
+                  const typename SIMD_T::Integer &vPrimId, const typename SIMD_T::Integer &vViewportIdx, const typename SIMD_T::Integer &vRtIdx)
     {
         // input/output vertex store for clipper
         SIMDVERTEX_T<SIMD_T> vertices[7]; // maximum 7 verts generated per triangle
@@ -538,6 +539,7 @@ public:
         const uint32_t *pVertexCount = reinterpret_cast<const uint32_t *>(&vNumClippedVerts);
         const uint32_t *pPrimitiveId = reinterpret_cast<const uint32_t *>(&vPrimId);
         const uint32_t *pViewportIdx = reinterpret_cast<const uint32_t *>(&vViewportIdx);
+        const uint32_t *pRtIdx = reinterpret_cast<const uint32_t *>(&vRtIdx);
 
         const SIMD256::Integer vOffsets = SIMD256::set_epi32(
             0 * sizeof(SIMDVERTEX_T<SIMD_T>), // unused lane
@@ -651,6 +653,8 @@ public:
 
             const typename SIMD_T::Integer primID = SIMD_T::set1_epi32(pPrimitiveId[inputPrim]);
             const typename SIMD_T::Integer viewportIdx = SIMD_T::set1_epi32(pViewportIdx[inputPrim]);
+            const typename SIMD_T::Integer rtIdx = SIMD_T::set1_epi32(pRtIdx[inputPrim]);
+
 
             while (clipPA.GetNextStreamOutput())
             {
@@ -662,7 +666,7 @@ public:
 
                     if (assemble)
                     {
-                        binner.pfnBinFunc(pDC, clipPA, workerId, attrib, primMask, primID, viewportIdx);
+                        binner.pfnBinFunc(pDC, clipPA, workerId, attrib, primMask, primID, viewportIdx, rtIdx);
                     }
 
                 } while (clipPA.NextPrim());
@@ -677,7 +681,8 @@ public:
         UPDATE_STAT_FE(CPrimitives, numClippedPrims);
     }
 
-    void ExecuteStage(PA_STATE &pa, typename SIMD_T::Vec4 prim[], uint32_t primMask, typename SIMD_T::Integer const &primId, typename SIMD_T::Integer const &viewportIdx)
+    void ExecuteStage(PA_STATE &pa, typename SIMD_T::Vec4 prim[], uint32_t primMask,
+                      typename SIMD_T::Integer const &primId, typename SIMD_T::Integer const &viewportIdx, typename SIMD_T::Integer const &rtIdx)
     {
         SWR_ASSERT(pa.pDC != nullptr);
 
@@ -716,7 +721,7 @@ public:
             AR_BEGIN(FEGuardbandClip, pa.pDC->drawId);
             // we have to clip tris, execute the clipper, which will also
             // call the binner
-            ClipSimd(prim, SIMD_T::vmask_ps(primMask), SIMD_T::vmask_ps(clipMask), pa, primId, viewportIdx);
+            ClipSimd(prim, SIMD_T::vmask_ps(primMask), SIMD_T::vmask_ps(clipMask), pa, primId, viewportIdx, rtIdx);
             AR_END(FEGuardbandClip, 1);
         }
         else if (validMask)
@@ -725,7 +730,7 @@ public:
             UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask));
 
             // forward valid prims directly to binner
-            binner.pfnBinFunc(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx);
+            binner.pfnBinFunc(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx, rtIdx);
         }
     }
 
@@ -1135,12 +1140,12 @@ private:
 
 
 // pipeline stage functions
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx);
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx);
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx);
+void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
+void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
+void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
 #if USE_SIMD16_FRONTEND
-void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx);
-void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx);
-void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx);
+void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
+void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
+void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
 #endif
 
index ef67193..cba8de9 100644 (file)
@@ -214,12 +214,12 @@ struct PA_STATE;
 
 // function signature for pipeline stages that execute after primitive assembly
 typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], 
-    uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx);
+    uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx, simdscalari const &rtIdx);
 
 #if ENABLE_AVX512_SIMD16
 // function signature for pipeline stages that execute after primitive assembly
 typedef void(SIMDCALL *PFN_PROCESS_PRIMS_SIMD16)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[],
-    uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx);
+    uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
 
 #endif
 OSALIGNLINE(struct) API_STATE
index 3de79d6..ed8ce15 100644 (file)
@@ -989,15 +989,27 @@ static void GeometryShaderStage(
 #if USE_SIMD16_FRONTEND
                                 simd16scalari vPrimId = _simd16_set1_epi32(pPrimitiveId[inputPrim]);
 
-                                // Gather the VPAI from the SVG if provided.
-                                SIMD16::Vec4 vpiAttrib[3];
-                                SIMD16::Integer vViewportIdx = SIMD16::setzero_si();
+                                // Gather data from the SVG if provided.
+                                simd16scalari vViewportIdx = SIMD16::setzero_si();
+                                simd16scalari vRtIdx = SIMD16::setzero_si();
+                                SIMD16::Vec4 svgAttrib[4];
+
+                                if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+                                {
+                                    gsPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
+                                }
+
+
                                 if (state.backendState.readViewportArrayIndex)
                                 {
-                                    gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
-                                    vViewportIdx = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+                                    vViewportIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
                                     gsPa.viewportArrayActive = true;
                                 }
+                                if (state.backendState.readRenderTargetArrayIndex)
+                                {
+                                    vRtIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+                                    gsPa.rtArrayActive = true;
+                                }
 
                                 {
                                     // OOB VPAI indices => forced to zero.
@@ -1007,29 +1019,40 @@ static void GeometryShaderStage(
                                     vViewportIdx = SIMD16::and_si(vClearMask, vViewportIdx);
 
                                     gsPa.useAlternateOffset = false;
-                                    pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx);
+                                    pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx, vRtIdx);
                                 }
 #else
                                 simdscalari vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]);
 
-                                // Gather the VPAI from the SVG if provided.
-                                SIMD8::Vec4 vpiAttrib[3];
-                                SIMD8::Integer vViewportIdx = SIMD8::setzero_si();
+                                // Gather data from the SVG if provided.
+                                simdscalari vViewportIdx = SIMD16::setzero_si();
+                                simdscalari vRtIdx = SIMD16::setzero_si();
+                                SIMD8::Vec4 svgAttrib[4];
+
+                                if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+                                {
+                                    tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
+                                }
+
+
                                 if (state.backendState.readViewportArrayIndex)
                                 {
-                                    gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
-                                    vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+                                    vViewportIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
 
                                     // OOB VPAI indices => forced to zero.
                                     vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si());
                                     simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
                                     simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports);
                                     vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx);
-
-                                    gsPa.viewportArrayActive = true;
+                                    tessPa.viewportArrayActive = true;
+                                }
+                                if (state.backendState.readRenderTargetArrayIndex)
+                                {
+                                    vRtIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+                                    tessPa.rtArrayActive = true;
                                 }
 
-                                pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx);
+                                pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx, vRtIdx);
 #endif
                             }
                         }
@@ -1372,46 +1395,68 @@ static void TessellationStages(
 
                     SWR_ASSERT(pfnClipFunc);
 #if USE_SIMD16_FRONTEND
-                    // Gather the VPAI from the SVG if provided.
-                    simd16scalari vpai = SIMD16::setzero_si();
+                    // Gather data from the SVG if provided.
+                    simd16scalari vViewportIdx = SIMD16::setzero_si();
+                    simd16scalari vRtIdx = SIMD16::setzero_si();
+                    SIMD16::Vec4 svgAttrib[4];
+
+                    if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+                    {
+                        tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
+                    }
+
+
                     if (state.backendState.readViewportArrayIndex)
                     {
-                        simd16vector vpiAttrib[4];
-                        tessPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
-                        vpai = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+                        vViewportIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
                         tessPa.viewportArrayActive = true;
                     }
+                    if (state.backendState.readRenderTargetArrayIndex)
+                    {
+                        vRtIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+                        tessPa.rtArrayActive = true;
+                    }
 
 
                     {
                         // OOB VPAI indices => forced to zero.
-                        vpai = SIMD16::max_epi32(vpai, SIMD16::setzero_si());
+                        vViewportIdx = SIMD16::max_epi32(vViewportIdx, SIMD16::setzero_si());
                         simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
-                        simd16scalari vClearMask = SIMD16::cmplt_epi32(vpai, vNumViewports);
-                        vpai = SIMD16::and_si(vClearMask, vpai);
+                        simd16scalari vClearMask = SIMD16::cmplt_epi32(vViewportIdx, vNumViewports);
+                        vViewportIdx = SIMD16::and_si(vClearMask, vViewportIdx);
 
                         tessPa.useAlternateOffset = false;
-                        pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID, vpai);
+                        pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID, vViewportIdx, vRtIdx);
                     }
 #else
-                    // Gather the VPAI from the SVG if provided.
-                    SIMD8::Vec4 vpiAttrib[3];
-                    SIMD8::Integer vViewportIdx = SIMD8::setzero_si();
+                    // Gather data from the SVG if provided.
+                    simdscalari vViewportIdx = SIMD16::setzero_si();
+                    simdscalari vRtIdx = SIMD16::setzero_si();
+                    SIMD8::Vec4 svgAttrib[4];
+
+                    if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+                    {
+                        tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
+                    }
+
                     if (state.backendState.readViewportArrayIndex)
                     {
-                        tessPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
-                        vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+                        vViewportIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
 
                         // OOB VPAI indices => forced to zero.
                         vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si());
                         simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
                         simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports);
                         vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx);
-
                         tessPa.viewportArrayActive = true;
                     }
+                    if (state.backendState.readRenderTargetArrayIndex)
+                    {
+                        vRtIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+                        tessPa.rtArrayActive = true;
+                    }
                     pfnClipFunc(pDC, tessPa, workerId, prim,
-                        GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), vViewportIdx);
+                        GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), vViewportIdx, vRtIdx);
 #endif
                 }
             }
@@ -1803,15 +1848,27 @@ void ProcessDraw(
                                 if (HasRastT::value)
                                 {
                                     SWR_ASSERT(pDC->pState->pfnProcessPrims_simd16);
-                                    // Gather the VPAI from the SVG if provided.
+                                    // Gather data from the SVG if provided.
                                     simd16scalari vpai = SIMD16::setzero_si();
+                                    simd16scalari rtai = SIMD16::setzero_si();
+                                    SIMD16::Vec4 svgAttrib[4];
+
+                                    if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+                                    {
+                                        pa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
+                                    }
+
+
                                     if (state.backendState.readViewportArrayIndex)
                                     {
-                                        simd16vector vpiAttrib[4];
-                                        pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
-                                        vpai = SIMD16::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+                                        vpai = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
                                         pa.viewportArrayActive = true;
                                     }
+                                    if (state.backendState.readRenderTargetArrayIndex)
+                                    {
+                                        rtai = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+                                        pa.rtArrayActive = true;
+                                    }
 
                                     {
                                         // OOB VPAI indices => forced to zero.
@@ -1821,7 +1878,7 @@ void ProcessDraw(
                                         vpai = SIMD16::and_si(vClearMask, vpai);
 
                                         pa.useAlternateOffset = false;
-                                        pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID, vpai);
+                                        pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID, vpai, rtai);
                                     }
                                 }
                             }
@@ -1983,25 +2040,35 @@ void ProcessDraw(
                                 {
                                     SWR_ASSERT(pDC->pState->pfnProcessPrims);
 
-                                    // Gather the VPAI from the SVG if provided.
-                                    SIMD8::Vec4 vpiAttrib[3];
-                                    SIMD8::Integer vViewportIdx = SIMD8::setzero_si();
+                                    // Gather data from the SVG if provided.
+                                    simdscalari vViewportIdx = SIMD16::setzero_si();
+                                    simdscalari vRtIdx = SIMD16::setzero_si();
+                                    SIMD8::Vec4 svgAttrib[4];
+
+                                    if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+                                    {
+                                        tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
+                                    }
+
                                     if (state.backendState.readViewportArrayIndex)
                                     {
-                                        pa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
-                                        vViewportIdx = SIMD8::castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
+                                        vViewportIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
 
                                         // OOB VPAI indices => forced to zero.
                                         vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si());
                                         simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
                                         simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports);
                                         vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx);
-
-                                        pa.viewportArrayActive = true;
+                                        tessPa.viewportArrayActive = true;
+                                    }
+                                    if (state.backendState.readRenderTargetArrayIndex)
+                                    {
+                                        vRtIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+                                        tessPa.rtArrayActive = true;
                                     }
 
                                     pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim,
-                                        GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), vViewportIdx);
+                                        GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), vViewportIdx, vRtIdx);
                                 }
                             }
                         }
index e2ca127..6a2ec84 100644 (file)
@@ -389,10 +389,10 @@ PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative);
 #endif
 
 struct PA_STATE_BASE;  // forward decl
-void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx);
-void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx);
+void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx, simdscalari const &rtIdx);
+void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx, simdscalari const &rtIdx);
 #if USE_SIMD16_FRONTEND
-void SIMDCALL BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx);
-void SIMDCALL BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx);
+void SIMDCALL BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
+void SIMDCALL BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
 #endif
 
index a0160d4..c88b4bf 100644 (file)
@@ -80,6 +80,7 @@ struct PA_STATE
 #endif
 
     bool viewportArrayActive{ false };
+    bool rtArrayActive { false };
     uint32_t numVertsPerPrim{ 0 };
 
     PA_STATE(){}