#define _simd16_add_epi8 SIMD16::add_epi8
#define _simd16_shuffle_epi8 SIMD16::shuffle_epi8
-#define _simd16_i32gather_ps(m, index, scale) SIMD16::i32gather_ps<SIMD16::ScaleFactor(scale)>(index, m)
+#define _simd16_i32gather_ps(m, index, scale) SIMD16::i32gather_ps<SIMD16::ScaleFactor(scale)>(m, index)
#define _simd16_mask_i32gather_ps(a, m, index, mask, scale) SIMD16::mask_i32gather_ps<SIMD16::ScaleFactor(scale)>(a, m, index, mask)
#define _simd16_abs_epi32 SIMD16::abs_epi32
#define _simd16_int2mask(mask) simd16mask(mask)
#define _simd16_mask2int(mask) int(mask)
+// convert bitmask to vector mask
+SIMDINLINE simd16scalar vMask16(int32_t mask)
+{
+ simd16scalari temp = _simd16_set1_epi32(mask);
+
+ simd16scalari bits = _simd16_set_epi32(0x8000, 0x4000, 0x2000, 0x1000, 0x0800, 0x0400, 0x0200, 0x0100, 0x0080, 0x0040, 0x0020, 0x0010, 0x0008, 0x0004, 0x0002, 0x0001);
+
+ simd16scalari result = _simd16_cmplt_epi32(_simd16_setzero_si(), _simd16_and_si(temp, bits));
+
+ return _simd16_castsi_ps(result);
+}
+
#endif//ENABLE_AVX512_SIMD16
#endif//__SWR_SIMD16INTRIN_H_
{
__mmask16 k = _mm512_cmpneq_ps_mask(mask, setzero_ps());
- return _mm512_mask_i32gather_ps(old, k, idx, p, ScaleT);
+ return _mm512_mask_i32gather_ps(old, k, idx, p, static_cast<int>(ScaleT));
}
static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src)
(SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, EdgeValToEdgeState(ALL_EDGES_VALID), (state.scissorsTileAligned == false));
}
+ simd16BBox bbox;
+
if (!triMask)
{
goto endBinTriangles;
}
// Calc bounding box of triangles
- simd16BBox bbox;
calcBoundingBoxIntVertical<CT>(tri, vXi, vYi, bbox);
// determine if triangle falls between pixel centers and discard
// OOB indices => forced to zero.
simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
- vpai = _simd16_max_epi32(_simd16_setzero_si(), vpai)
+ vpai = _simd16_max_epi32(_simd16_setzero_si(), vpai);
simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
viewportIdx = _simd16_and_si(vClearMask, vpai);
const simdscalar unused = _simd_setzero_ps();
+ // transpose verts needed for backend
+ /// @todo modify BE to take non-transformed verts
+ simd4scalar vHorizX[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH
+ simd4scalar vHorizY[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH
+ simd4scalar vHorizZ[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH
+ simd4scalar vHorizW[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH
+
if (!primMask)
{
goto endBinLines;
_simd16_store_si(reinterpret_cast<simd16scalari *>(aMTTop), bbox.ymin);
_simd16_store_si(reinterpret_cast<simd16scalari *>(aMTBottom), bbox.ymax);
- // transpose verts needed for backend
- /// @todo modify BE to take non-transformed verts
- simd4scalar vHorizX[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH
- simd4scalar vHorizY[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH
- simd4scalar vHorizZ[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH
- simd4scalar vHorizW[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH
-
vTranspose3x8(vHorizX[0], _simd16_extract_ps(prim[0].x, 0), _simd16_extract_ps(prim[1].x, 0), unused);
vTranspose3x8(vHorizY[0], _simd16_extract_ps(prim[0].y, 0), _simd16_extract_ps(prim[1].y, 0), unused);
vTranspose3x8(vHorizZ[0], _simd16_extract_ps(prim[0].z, 0), _simd16_extract_ps(prim[1].z, 0), unused);
AR_BEGIN(FEGuardbandClip, pa.pDC->drawId);
// we have to clip tris, execute the clipper, which will also
// call the binner
- ClipSimd(_simd16_vmask_ps(primMask), _simd16_vmask_ps(clipMask), pa, primId);
+ ClipSimd(vMask(primMask), vMask(clipMask), pa, primId);
AR_END(FEGuardbandClip, 1);
}
else if (validMask)