// copied from http://wm.ite.pl/articles/pdep-soft-emu.html
// using bsf instead of funky loop
- DWORD maskIndex;
+ unsigned long maskIndex = 0;
while (_BitScanForward(&maskIndex, mask))
{
// 1. isolate lowest set bit of mask
return _pext_u32(a, mask);
#else
UINT result = 0;
- DWORD maskIndex;
+ unsigned long maskIndex;
uint32_t currentBit = 0;
while (_BitScanForward(&maskIndex, mask))
{
#endif
#endif
-inline unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask)
+inline unsigned char _BitScanForward64(unsigned long* Index, uint64_t Mask)
{
- *Index = __builtin_ctz(Mask);
+ *Index = __builtin_ctzll(Mask);
return (Mask != 0);
}
-inline unsigned char _BitScanForward(unsigned int* Index, unsigned int Mask)
+inline unsigned char _BitScanForward(unsigned long* Index, uint32_t Mask)
{
*Index = __builtin_ctz(Mask);
return (Mask != 0);
}
-inline unsigned char _BitScanReverse(unsigned long* Index, unsigned long Mask)
+inline unsigned char _BitScanReverse64(unsigned long* Index, uint64_t Mask)
{
- *Index = 63 - __builtin_clz(Mask);
+ *Index = 63 - __builtin_clzll(Mask);
return (Mask != 0);
}
-inline unsigned char _BitScanReverse(unsigned int* Index, unsigned int Mask)
+inline unsigned char _BitScanReverse(unsigned long* Index, uint32_t Mask)
{
*Index = 31 - __builtin_clz(Mask);
return (Mask != 0);
}
-#define _BitScanForward64 _BitScanForward
-#define _BitScanReverse64 _BitScanReverse
-
inline void* AlignedMalloc(size_t size, size_t alignment)
{
void* ret;
uint32_t* pOffsets = (uint32_t*)&idx;
Float vResult = old;
float* pResult = (float*)&vResult;
- DWORD index;
+ unsigned long index;
uint32_t umask = movemask_ps(mask);
while (_BitScanForward(&index, umask))
{
uint32_t* pOffsets = (uint32_t*)&idx;
Float vResult = old;
float* pResult = (float*)&vResult;
- DWORD index;
+ unsigned long index = 0;
uint32_t umask = movemask_ps(mask);
while (_BitScanForward(&index, umask))
{
streamMasks |= pState->state.soState.streamMasks[i];
}
- DWORD maxAttrib;
+ unsigned long maxAttrib;
if (_BitScanReverse64(&maxAttrib, streamMasks))
{
pState->state.feNumAttributes =
// Disable hottile for surfaces with no writes
if (psState.pfnPixelShader != nullptr)
{
- DWORD rt;
+ unsigned long rt;
uint32_t rtMask = pState->state.psState.renderTargetMask;
while (_BitScanForward(&rt, rtMask))
{
uint32_t colorHotTileMask,
RenderOutputBuffers& renderBuffers)
{
- DWORD index;
+ unsigned long index;
while (_BitScanForward(&index, colorHotTileMask))
{
assert(index < SWR_NUM_RENDERTARGETS);
simdvector blendSrc;
simdvector blendOut;
- DWORD rt;
+ unsigned long rt;
while (_BitScanForward(&rt, renderTargetMask))
{
renderTargetMask &= ~(1 << rt);
if (useAlternateOffset)
{
- DWORD rt;
+ unsigned long rt;
uint32_t rtMask = state.colorHottileEnable;
while (_BitScanForward(&rt, rtMask))
{
if (useAlternateOffset)
{
- DWORD rt;
+ unsigned long rt;
uint32_t rtMask = state.colorHottileEnable;
while (_BitScanForward(&rt, rtMask))
{
if (useAlternateOffset)
{
- DWORD rt;
+ unsigned long rt;
uint32_t rtMask = state.colorHottileEnable;
while (_BitScanForward(&rt, rtMask))
{
uint32_t mask = backendState.swizzleMap[i].componentOverrideMask;
if (mask)
{
- DWORD comp;
+ unsigned long comp;
while (_BitScanForward(&comp, mask))
{
mask &= ~(1 << comp);
float* pRecipW,
float* pUserClipBuffer)
{
- DWORD clipDist;
+ unsigned long clipDist;
uint32_t clipDistMask = state.clipDistanceMask;
while (_BitScanForward(&clipDist, clipDistMask))
{
TransposeVertices(vHorizW, vRecipW0, vRecipW1, vRecipW2);
// scan remaining valid triangles and bin each separately
- while (_BitScanForward((DWORD*)&triIndex, triMask))
+ while (_BitScanForward((unsigned long*)&triIndex, triMask))
{
uint32_t linkageCount = state.backendState.numAttributes;
uint32_t numScalarAttribs = linkageCount * 4;
const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
// scan remaining valid triangles and bin each separately
- while (_BitScanForward((DWORD*)&primIndex, primMask))
+ while (_BitScanForward((unsigned long*)&primIndex, primMask))
{
uint32_t linkageCount = backendState.numAttributes;
uint32_t numScalarAttribs = linkageCount * 4;
// scan remaining valid prims and bin each separately
const SWR_BACKEND_STATE& backendState = state.backendState;
uint32_t primIndex;
- while (_BitScanForward((DWORD*)&primIndex, primMask))
+ while (_BitScanForward((unsigned long*)&primIndex, primMask))
{
uint32_t linkageCount = backendState.numAttributes;
uint32_t numScalarAttribs = linkageCount * 4;
TransposeVertices(vHorizW, vRecipW0, vRecipW1, SIMD_T::setzero_ps());
// scan remaining valid prims and bin each separately
- uint32_t primIndex;
- while (_BitScanForward((DWORD*)&primIndex, primMask))
+ unsigned long primIndex;
+ while (_BitScanForward(&primIndex, primMask))
{
uint32_t linkageCount = state.backendState.numAttributes;
uint32_t numScalarAttribs = linkageCount * 4;
pa.Assemble(vertexClipCullOffset, vClipCullDistLo);
pa.Assemble(vertexClipCullOffset + 1, vClipCullDistHi);
- DWORD index;
+ unsigned long index;
while (_BitScanForward(&index, cullMask))
{
cullMask &= ~(1 << index);
const uint32_t* pOffsets = reinterpret_cast<const uint32_t*>(&vOffsets);
const float* pSrc = reinterpret_cast<const float*>(&vSrc);
uint32_t mask = SIMD_T::movemask_ps(vMask);
- DWORD lane;
+ unsigned long lane;
while (_BitScanForward(&lane, mask))
{
mask &= ~(1 << lane);
for (uint32_t primIndex = 0; primIndex < numPrims; ++primIndex)
{
- DWORD slot = 0;
+ unsigned long slot = 0;
uint64_t soMask = soState.streamMasks[streamIndex];
// Write all entries into primitive data buffer for SOS.
// overwrite texcoord for point sprites
uint32_t texCoordMask = backendState.pointSpriteTexCoordMask;
- DWORD texCoordAttrib = 0;
+ unsigned long texCoordAttrib = 0;
while (_BitScanForward(&texCoordAttrib, texCoordMask))
{
if (isPointSpriteTexCoordEnabled)
{
uint32_t texCoordMask = backendState.pointSpriteTexCoordMask;
- DWORD texCoordAttrib = 0;
+ unsigned long texCoordAttrib = 0;
while (_BitScanForward(&texCoordAttrib, texCoordMask))
{
template <typename RT>
INLINE void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers& buffers)
{
- DWORD rt = 0;
+ unsigned long rt = 0;
while (_BitScanForward(&rt, colorHotTileMask))
{
colorHotTileMask &= ~(1 << rt);
RenderOutputBuffers& buffers,
RenderOutputBuffers& startBufferRow)
{
- DWORD rt = 0;
+ unsigned long rt = 0;
while (_BitScanForward(&rt, colorHotTileMask))
{
colorHotTileMask &= ~(1 << rt);
SIMD256::store_ps(src, vSrc);
SIMD256::store_si((SIMD256::Integer*)indices, vIndices);
- DWORD index;
+ unsigned long index;
while (_BitScanForward(&index, mask))
{
mask &= ~(1 << index);
Value* PackMask(uint32_t bitmask)
{
std::vector<Constant*> indices(4, C(0));
- DWORD index;
+ unsigned long index;
uint32_t elem = 0;
while (_BitScanForward(&index, bitmask))
{