std::string* pOptStdErr = nullptr, ///< (Optional Out) Standard Error text
const std::string* pOptStdIn = nullptr); ///< (Optional In) Standard Input text
+
+/// Helper for setting up FP state
+/// @returns old csr state
+static INLINE uint32_t SetOptimalVectorCSR()
+{
+ uint32_t oldCSR = _mm_getcsr();
+
+ uint32_t newCSR = (oldCSR & ~(_MM_ROUND_MASK | _MM_DENORMALS_ZERO_MASK | _MM_FLUSH_ZERO_MASK));
+ newCSR |= (_MM_ROUND_NEAREST | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);
+ _mm_setcsr(newCSR);
+
+ return oldCSR;
+}
+
+/// Set Vector CSR state.
+/// @param csrState - should be value returned from SetOptimalVectorCSR()
+static INLINE void RestoreVectorCSR(uint32_t csrState)
+{
+ _mm_setcsr(csrState);
+}
+
#endif //__SWR_OS_H__
if (pContext->threadInfo.SINGLE_THREADED)
{
- // flush denormals to 0
- uint32_t mxcsr = _mm_getcsr();
- _mm_setcsr(mxcsr | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);
+ uint32_t mxcsr = SetOptimalVectorCSR();
if (IsDraw)
{
}
// restore csr
- _mm_setcsr(mxcsr);
+ RestoreVectorCSR(mxcsr);
}
else
{
{
vIndex = _simd16_add_epi32(_simd16_set1_epi32(work.startVertexID), vScale);
- fetchInfo_lo.xpIndices =
- pDC->pContext->pfnMakeGfxPtr(GetPrivateState(pDC), &vIndex);
- fetchInfo_hi.xpIndices =
- pDC->pContext->pfnMakeGfxPtr(GetPrivateState(pDC), &vIndex + KNOB_SIMD_WIDTH * sizeof(int32_t)); // 1/2 of KNOB_SIMD16_WIDTH
+ fetchInfo_lo.xpIndices = pDC->pContext->pfnMakeGfxPtr(GetPrivateState(pDC), &vIndex);
+ fetchInfo_hi.xpIndices = pDC->pContext->pfnMakeGfxPtr(
+ GetPrivateState(pDC),
+ &vIndex + KNOB_SIMD_WIDTH * sizeof(int32_t)); // 1/2 of KNOB_SIMD16_WIDTH
}
fetchInfo_lo.CurInstance = instanceNum;
for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
{
stats.DepthPassCount += dynState.pStats[i].DepthPassCount;
-
stats.PsInvocations += dynState.pStats[i].PsInvocations;
stats.CsInvocations += dynState.pStats[i].CsInvocations;
+
}
pDC->retireCallback.pfnCallbackFunc(pDC->retireCallback.userData,
pDC->retireCallback.userData2,
pDC->retireCallback.userData3);
+
+ // Callbacks to external code *could* change floating point control state
+ // Reset our optimal flags
+ SetOptimalVectorCSR();
}
}
uint32_t numaNode = pThreadData->numaId - pContext->threadInfo.BASE_NUMA_NODE;
uint32_t numaMask = pContext->threadPool.numaMask;
- // flush denormals to 0
- _mm_setcsr(_mm_getcsr() | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);
+ SetOptimalVectorCSR();
// Track tiles locked by other threads. If we try to lock a macrotile and find its already
// locked then we'll add it to this list so that we don't try and lock it again.