typedef pstatus_t (*__set_8u_t)(
BYTE val,
BYTE* pDst,
- INT32 len);
+ UINT32 len);
typedef pstatus_t (*__set_32s_t)(
INT32 val,
INT32* pDst,
- INT32 len);
+ UINT32 len);
typedef pstatus_t (*__set_32u_t)(
UINT32 val,
UINT32* pDst,
- INT32 len);
+ UINT32 len);
typedef pstatus_t (*__zero_t)(
void* pDst,
size_t bytes);
typedef pstatus_t (*__alphaComp_argb_t)(
- const BYTE* pSrc1, INT32 src1Step,
- const BYTE* pSrc2, INT32 src2Step,
- BYTE* pDst, INT32 dstStep,
- INT32 width, INT32 height);
+ const BYTE* pSrc1, UINT32 src1Step,
+ const BYTE* pSrc2, UINT32 src2Step,
+ BYTE* pDst, UINT32 dstStep,
+ UINT32 width, UINT32 height);
typedef pstatus_t (*__add_16s_t)(
const INT16* pSrc1,
const INT16* pSrc2,
INT16* pDst,
- INT32 len);
+ UINT32 len);
typedef pstatus_t (*__lShiftC_16s_t)(
const INT16* pSrc,
- INT32 val,
+ UINT32 val,
INT16* pSrcDst,
- INT32 len);
+ UINT32 len);
typedef pstatus_t (*__lShiftC_16u_t)(
const UINT16* pSrc,
- INT32 val,
+ UINT32 val,
UINT16* pSrcDst,
- INT32 len);
+ UINT32 len);
typedef pstatus_t (*__rShiftC_16s_t)(
const INT16* pSrc,
- INT32 val,
+ UINT32 val,
INT16* pSrcDst,
- INT32 len);
+ UINT32 len);
typedef pstatus_t (*__rShiftC_16u_t)(
const UINT16* pSrc,
- INT32 val,
+ UINT32 val,
UINT16* pSrcDst,
- INT32 len);
+ UINT32 len);
typedef pstatus_t (*__shiftC_16s_t)(
const INT16* pSrc,
INT32 val,
INT16* pSrcDst,
- INT32 len);
+ UINT32 len);
typedef pstatus_t (*__shiftC_16u_t)(
const UINT16* pSrc,
INT32 val,
UINT16* pSrcDst,
- INT32 len);
+ UINT32 len);
typedef pstatus_t (*__sign_16s_t)(
const INT16* pSrc,
INT16* pDst,
- INT32 len);
+ UINT32 len);
typedef pstatus_t (*__yCbCrToRGB_16s8u_P3AC4R_t)(
const INT16* pSrc[3], INT32 srcStep,
BYTE* pDst, INT32 dstStep, UINT32 DstFormat,
INT16* pDst[3], INT32 dstStep,
const prim_size_t* roi);
typedef pstatus_t (*__RGBToRGB_16s8u_P3AC4R_t)(
- const INT16* pSrc[3], INT32 srcStep,
- BYTE* pDst, INT32 dstStep, UINT32 DstFormat,
+ const INT16* const pSrc[3], UINT32 srcStep,
+ BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
const prim_size_t* roi);
typedef pstatus_t (*__YCoCgToRGB_8u_AC4R_t)(
const BYTE* pSrc, INT32 srcStep,
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
const prim_size_t* roi);
typedef pstatus_t (*__RGBToYUV420_8u_P3AC4R_t)(
- const BYTE* pSrc, UINT32 srcStep,
+ const BYTE* pSrc, UINT32 SrcFormat, UINT32 srcStep,
BYTE* pDst[3], UINT32 dstStep[3],
const prim_size_t* roi);
typedef pstatus_t (*__RGBToYUV444_8u_P3AC4R_t)(
- const BYTE* pSrc, UINT32 srcStep,
+ const BYTE* pSrc, UINT32 SrcFormat, UINT32 srcStep,
BYTE* pDst[3], UINT32 dstStep[3],
const prim_size_t* roi);
typedef pstatus_t (*__YUV420CombineToYUV444_t)(
roi.width = nSrcWidth;
roi.height = nSrcHeight;
- prims->RGBToYUV420_8u_P3AC4R(pSrcData, nSrcStep, pYUVData, iStride, &roi);
+ prims->RGBToYUV420_8u_P3AC4R(pSrcData, SrcFormat, nSrcStep, pYUVData, iStride, &roi);
status = h264->subsystem->Compress(h264, ppDstData, pDstSize, 0);
/* ------------------------------------------------------------------------- */
static pstatus_t general_YCoCgToRGB_8u_AC4R(
- const BYTE* pSrc, INT32 srcStep,
- BYTE* pDst, UINT32 DstFormat, INT32 dstStep,
- UINT32 width, UINT32 height,
- UINT8 shift,
- BOOL withAlpha)
+ const BYTE* pSrc, INT32 srcStep,
+ BYTE* pDst, UINT32 DstFormat, INT32 dstStep,
+ UINT32 width, UINT32 height,
+ UINT8 shift,
+ BOOL withAlpha)
{
BYTE A;
UINT32 x, y;
R = T + Co;
G = Y + Cg;
B = T - Co;
- color = GetColor(DstFormat, MINMAX(R, 0, 255), MINMAX(G, 0, 255), MINMAX(B, 0,
- 255), A);
+
+ color = GetColor(DstFormat,
+ MINMAX(R, 0, 255), MINMAX(G, 0, 255),
+ MINMAX(B, 0, 255), A);
+ WriteColor(dptr, DstFormat, color);
dptr += GetBytesPerPixel(DstFormat);
}
#ifdef WITH_SSE2
/* ------------------------------------------------------------------------- */
static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert(
- const BYTE* pSrc, INT32 srcStep,
- BYTE* pDst, INT32 dstStep,
+ const BYTE* pSrc, UINT32 srcStep,
+ BYTE* pDst, UINT32 DstFormat, UINT32 dstStep,
UINT32 width, UINT32 height,
UINT8 shift,
BOOL withAlpha)
{
/* Too small, or we'll never hit a 16-byte boundary. Punt. */
return generic->YCoCgToRGB_8u_AC4R(
- pSrc, srcStep, pDst, dstStep,
- width, height, shift, withAlpha, TRUE);
+ pSrc, srcStep, pDst, DstFormat, dstStep,
+ width, height, shift, withAlpha);
}
for (h = 0; h < height; h++)
/* Get to a 16-byte destination boundary. */
if ((ULONG_PTR) dptr & 0x0f)
{
+ pstatus_t status;
int startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4;
if (startup > width) startup = width;
- generic->YCoCgToRGB_8u_AC4R(sptr, srcStep, dptr, dstStep,
- startup, 1, shift, withAlpha, TRUE);
+ status = generic->YCoCgToRGB_8u_AC4R(
+ sptr, srcStep, dptr, DstFormat, dstStep,
+ startup, 1, shift, withAlpha);
+ if (status != PRIMITIVES_SUCCESS)
+ return status;
sptr += startup * sizeof(UINT32);
dptr += startup * sizeof(UINT32);
w -= startup;
/* Handle any remainder pixels. */
if (w > 0)
{
- generic->YCoCgToRGB_8u_AC4R(sptr, srcStep, dptr, dstStep,
- w, 1, shift, withAlpha, TRUE);
+ pstatus_t status;
+ status = generic->YCoCgToRGB_8u_AC4R(
+ sptr, srcStep, dptr, DstFormat, dstStep,
+ w, 1, shift, withAlpha);
+ if (status != PRIMITIVES_SUCCESS)
+ return status;
+
sptr += w * sizeof(UINT32);
dptr += w * sizeof(UINT32);
}
/* ------------------------------------------------------------------------- */
static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
- const BYTE* pSrc, INT32 srcStep,
- BYTE* pDst, INT32 dstStep,
+ const BYTE* pSrc, UINT32 srcStep,
+ BYTE* pDst, UINT32 DstFormat, UINT32 dstStep,
UINT32 width, UINT32 height,
UINT8 shift,
BOOL withAlpha)
{
/* Too small, or we'll never hit a 16-byte boundary. Punt. */
return generic->YCoCgToRGB_8u_AC4R(
- pSrc, srcStep,
- pDst, dstStep, width, height, shift,
- withAlpha, FALSE);
+ pSrc, srcStep, pDst, DstFormat, dstStep,
+ width, height, shift, withAlpha);
}
for (h = 0; h < height; h++)
/* Get to a 16-byte destination boundary. */
if ((ULONG_PTR) dptr & 0x0f)
{
+ pstatus_t status;
int startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4;
if (startup > width) startup = width;
- generic->YCoCgToRGB_8u_AC4R(sptr, srcStep, dptr, dstStep,
- startup, 1, shift, withAlpha, FALSE);
+ status = generic->YCoCgToRGB_8u_AC4R(
+ sptr, srcStep, dptr, DstFormat,
+ dstStep, startup, 1, shift, withAlpha);
+ if (status != PRIMITIVES_SUCCESS)
+ return status;
+
sptr += startup * sizeof(UINT32);
dptr += startup * sizeof(UINT32);
w -= startup;
/* Handle any remainder pixels. */
if (w > 0)
{
- generic->YCoCgToRGB_8u_AC4R(sptr, srcStep, dptr, dstStep,
- w, 1, shift, withAlpha, FALSE);
+ pstatus_t status;
+ status = generic->YCoCgToRGB_8u_AC4R(
+ sptr, srcStep, dptr, DstFormat, dstStep,
+ w, 1, shift, withAlpha);
+ if (status != PRIMITIVES_SUCCESS)
+ return status;
+
sptr += w * sizeof(UINT32);
dptr += w * sizeof(UINT32);
}
/* ------------------------------------------------------------------------- */
static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R(
const BYTE* pSrc, INT32 srcStep,
- BYTE* pDst, INT32 dstStep,
+ BYTE* pDst, UINT32 DstFormat, INT32 dstStep,
UINT32 width, UINT32 height,
UINT8 shift,
- BOOL withAlpha,
- BOOL invert)
+ BOOL withAlpha)
{
- if (invert)
- {
- return ssse3_YCoCgRToRGB_8u_AC4R_invert(pSrc, srcStep, pDst, dstStep,
- width, height, shift, withAlpha);
- }
- else
+ // TODO: Need to implement proper color conversion!!!
+ return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat,
+ dstStep, width, height, shift, withAlpha);
+
+ switch(DstFormat)
{
- return ssse3_YCoCgRToRGB_8u_AC4R_no_invert(pSrc, srcStep, pDst, dstStep,
- width, height, shift, withAlpha);
+ case PIXEL_FORMAT_BGRX32:
+ case PIXEL_FORMAT_BGRA32:
+ return ssse3_YCoCgRToRGB_8u_AC4R_invert(
+ pSrc, srcStep, pDst, DstFormat, dstStep,
+ width, height, shift, withAlpha);
+ case PIXEL_FORMAT_RGBX32:
+ case PIXEL_FORMAT_RGBA32:
+ return ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
+ pSrc, srcStep, pDst, DstFormat, dstStep,
+ width, height, shift, withAlpha);
+ default:
+ return -1;
}
}
#endif /* WITH_SSE2 */
{
/* Filter */
const INT32 u = pSrcU[2 * x] + pSrcU[2 * x + 1] + pSrcU1[2 * x]
- + pSrcU1[2 * x + 1];
+ + pSrcU1[2 * x + 1];
const INT32 v = pSrcV[2 * x] + pSrcV[2 * x + 1] + pSrcV1[2 * x]
- + pSrcV1[2 * x + 1];
+ + pSrcV1[2 * x + 1];
pU[x] = CLIP(u / 4L);
pV[x] = CLIP(v / 4L);
}
const BYTE r = YUV2R(Y, U, V);
const BYTE g = YUV2G(Y, U, V);
const BYTE b = YUV2B(Y, U, V);
- UINT32 color = GetColor(format, r, g, b, 0);
+ UINT32 color = GetColor(format, r, g, b, 0xFF);
WriteColor(dst, format, color);
return dst + GetBytesPerPixel(format);
}
}
static pstatus_t general_RGBToYUV444_8u_P3AC4R(
- const BYTE* pSrc, const UINT32 srcStep,
+ const BYTE* pSrc, UINT32 SrcFormat, const UINT32 srcStep,
BYTE* pDst[3], UINT32 dstStep[3], const prim_size_t* roi)
{
+ const UINT32 bpp = GetBytesPerPixel(SrcFormat);
UINT32 x, y;
UINT32 nWidth, nHeight;
nWidth = roi->width;
for (x = 0; x < nWidth; x++)
{
- const BYTE B = pRGB[4 * x + 0];
- const BYTE G = pRGB[4 * x + 1];
- const BYTE R = pRGB[4 * x + 2];
+ BYTE B, G, R;
+ const UINT32 color = ReadColor(&pRGB[x * bpp], SrcFormat);
+ SplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
+
pY[x] = RGB2Y(R, G, B);
pU[x] = RGB2U(R, G, B);
pV[x] = RGB2V(R, G, B);
}
static pstatus_t general_RGBToYUV420_8u_P3AC4R(
- const BYTE* pSrc, UINT32 srcStep,
+ const BYTE* pSrc, UINT32 SrcFormat, UINT32 srcStep,
BYTE* pDst[3], UINT32 dstStep[3], const prim_size_t* roi)
{
+ const UINT32 bpp = GetBytesPerPixel(SrcFormat);
UINT32 x, y;
UINT32 halfWidth;
UINT32 halfHeight;
for (x = 0; x < halfWidth; x++)
{
- INT32 R, G, B;
+ UINT32 color;
INT32 Ra, Ga, Ba;
const UINT32 val2x = (x * 2);
const UINT32 val2x1 = val2x + 1;
+ BYTE B, G, R;
+
/* 1st pixel */
- Ba = B = pRGB[val2x * 4 + 0];
- Ga = G = pRGB[val2x * 4 + 1];
- Ra = R = pRGB[val2x * 4 + 2];
+ color = ReadColor(&pRGB[val2x * bpp], SrcFormat);
+ SplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
+
+ Ba = B;
+ Ga = G;
+ Ra = R;
pY[val2x] = RGB2Y(R, G, B);
if (val2x1 < nWidth)
{
/* 2nd pixel */
- Ba += B = pRGB[val2x * 4 + 4];
- Ga += G = pRGB[val2x * 4 + 5];
- Ra += R = pRGB[val2x * 4 + 6];
+ color = ReadColor(&pRGB[val2x1 * bpp], SrcFormat);
+ SplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
+ Ba += B;
+ Ga += G;
+ Ra += R;
pY[val2x1] = RGB2Y(R, G, B);
}
if (val2y1 < nHeight)
{
/* 3rd pixel */
- Ba += B = pRGB1[val2x * 4 + 0];
- Ga += G = pRGB1[val2x * 4 + 1];
- Ra += R = pRGB1[val2x * 4 + 2];
+ color = ReadColor(&pRGB1[val2x * bpp], SrcFormat);
+ SplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
+ Ba += B;
+ Ga += G;
+ Ra += R;
pY1[val2x] = RGB2Y(R, G, B);
if (val2x1 < nWidth)
{
/* 4th pixel */
- Ba += B = pRGB1[val2x * 4 + 4];
- Ga += G = pRGB1[val2x * 4 + 5];
- Ra += R = pRGB1[val2x * 4 + 6];
+ color = ReadColor(&pRGB1[val2x1 * bpp], SrcFormat);
+ SplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
+ Ba += B;
+ Ga += G;
+ Ra += R;
pY1[val2x1] = RGB2Y(R, G, B);
}
}
UINT32 i, nWidth, nHeight, VaddDst, VaddY, VaddU, VaddV;
__m128i r0, r1, r2, r3, r4, r5, r6, r7;
__m128i* buffer;
+
+ // TODO: Need to implement proper color conversion!!!!!
+ return generic->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep,
+ DstFormat, roi);
+
/* last_line: if the last (U,V doubled) line should be skipped, set to 10B
* last_column: if it's the last column in a line, set to 10B (for handling line-endings not multiple by four) */
buffer = _aligned_malloc(4 * 16, 16);
const INT16* pSrc1,
const INT16* pSrc2,
INT16* pDst,
- INT32 len)
+ UINT32 len)
{
while (len--)
{
# if !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS)
/* ------------------------------------------------------------------------- */
SSE3_SSD_ROUTINE(sse3_add_16s, INT16, generic->add_16s,
- _mm_adds_epi16, generic->add_16s(sptr1++, sptr2++, dptr++, 1))
+ _mm_adds_epi16, generic->add_16s(sptr1++, sptr2++, dptr++, 1))
# endif /* !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) */
#endif
/* ------------------------------------------------------------------------- */
static pstatus_t general_alphaComp_argb(
- const BYTE* pSrc1, INT32 src1Step,
- const BYTE* pSrc2, INT32 src2Step,
- BYTE* pDst, INT32 dstStep,
- INT32 width, INT32 height)
+ const BYTE* pSrc1, UINT32 src1Step,
+ const BYTE* pSrc2, UINT32 src2Step,
+ BYTE* pDst, UINT32 dstStep,
+ UINT32 width, UINT32 height)
{
- const UINT32* sptr1 = (const UINT32*) pSrc1;
- const UINT32* sptr2 = (const UINT32*) pSrc2;
- UINT32* dptr = (UINT32*) pDst;
- int linebytes = width * sizeof(UINT32);
- int src1Jump = (src1Step - linebytes) / sizeof(UINT32);
- int src2Jump = (src2Step - linebytes) / sizeof(UINT32);
- int dstJump = (dstStep - linebytes) / sizeof(UINT32);
- int y;
+ UINT32 y;
for (y = 0; y < height; y++)
{
- int x;
+ const UINT32* sptr1 = (const UINT32*) (pSrc1 + y * src1Step);
+ const UINT32* sptr2 = (const UINT32*) (pSrc2 + y * src2Step);
+ UINT32* dptr = (UINT32*) (pDst + y * dstStep);
+ UINT32 x;
for (x = 0; x < width; x++)
{
*dptr++ = rb | ag;
}
}
-
- sptr1 += src1Jump;
- sptr2 += src2Jump;
- dptr += dstJump;
}
return PRIMITIVES_SUCCESS;
#if !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS)
pstatus_t sse2_alphaComp_argb(
- const BYTE* pSrc1, INT32 src1Step,
- const BYTE* pSrc2, INT32 src2Step,
- BYTE* pDst, INT32 dstStep,
- INT32 width, INT32 height)
+ const BYTE* pSrc1, UINT32 src1Step,
+ const BYTE* pSrc2, UINT32 src2Step,
+ BYTE* pDst, UINT32 dstStep,
+ UINT32 width, UINT32 height)
{
const UINT32* sptr1 = (const UINT32*) pSrc1;
const UINT32* sptr2 = (const UINT32*) pSrc2;
if (width < 4) /* pointless if too small */
{
return generic->alphaComp_argb(pSrc1, src1Step, pSrc2, src2Step,
- pDst, dstStep, width, height);
+ pDst, dstStep, width, height);
}
dptr = (UINT32*) pDst;
if (leadIn)
{
- generic->alphaComp_argb((const BYTE*) sptr1,
- src1Step, (const BYTE*) sptr2, src2Step,
- (BYTE*) dptr, dstStep, leadIn, 1);
+ pstatus_t status;
+ status = generic->alphaComp_argb((const BYTE*) sptr1,
+ src1Step, (const BYTE*) sptr2, src2Step,
+ (BYTE*) dptr, dstStep, leadIn, 1);
+ if (status != PRIMITIVES_SUCCESS)
+ return status;
+
sptr1 += leadIn;
sptr2 += leadIn;
dptr += leadIn;
/* Finish off the remainder. */
if (pixels)
{
- generic->alphaComp_argb((const BYTE*) sptr1, src1Step,
- (const BYTE*) sptr2, src2Step,
- (BYTE*) dptr, dstStep, pixels, 1);
+ pstatus_t status;
+ status = generic->alphaComp_argb((const BYTE*) sptr1, src1Step,
+ (const BYTE*) sptr2, src2Step,
+ (BYTE*) dptr, dstStep, pixels, 1);
+ if (status != PRIMITIVES_SUCCESS)
+ return status;
+
sptr1 += pixels;
sptr2 += pixels;
dptr += pixels;
sz.width = width;
sz.height = height;
return ippiAlphaComp_8u_AC4R(pSrc1, src1Step, pSrc2, src2Step,
- pDst, dstStep, sz, ippAlphaOver);
+ pDst, dstStep, sz, ippAlphaOver);
}
#endif
for (y = 0; y < roi->height; y++)
{
- int x;
+ UINT32 x;
for (x = 0; x < roi->width; ++x)
{
/* ------------------------------------------------------------------------- */
static pstatus_t general_RGBToRGB_16s8u_P3AC4R(
- const INT16* pSrc[3], /* 16-bit R,G, and B arrays */
- INT32 srcStep, /* bytes between rows in source data */
+ const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */
+ UINT32 srcStep, /* bytes between rows in source data */
BYTE* pDst, /* 32-bit interleaved ARGB (ABGR?) data */
- INT32 dstStep, /* bytes between rows in dest data */
+ UINT32 dstStep, /* bytes between rows in dest data */
UINT32 DstFormat,
const prim_size_t* roi) /* region of interest */
{
{
/* We can't maintain 16-byte alignment. */
return generic->yCbCrToRGB_16s16s_P3P3(pSrc, srcStep,
- pDst, dstStep, roi);
+ pDst, dstStep, roi);
}
zero = _mm_setzero_si128();
{
/* We can't maintain 16-byte alignment. */
return generic->RGBToYCbCr_16s16s_P3P3(pSrc, srcStep,
- pDst, dstStep, roi);
+ pDst, dstStep, roi);
}
min = _mm_set1_epi16(-128 * 32);
_mm_set1_epi32(0xFFFFFFFFU)
pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
- const INT16* pSrc[3], /* 16-bit R,G, and B arrays */
- INT32 srcStep, /* bytes between rows in source data */
+ const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */
+ UINT32 srcStep, /* bytes between rows in source data */
BYTE* pDst, /* 32-bit interleaved ARGB (ABGR?) data */
- INT32 dstStep, /* bytes between rows in dest data */
+ UINT32 dstStep, /* bytes between rows in dest data */
UINT32 DstFormat,
const prim_size_t* roi) /* region of interest */
{
|| (dstStep & 0x0f))
{
return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst,
- dstStep, DstFormat, roi);
+ dstStep, DstFormat, roi);
}
+ // TODO: Need to update SSE code to allow color conversion!!!
+ return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst,
+ dstStep, DstFormat, roi);
+
out = (BYTE*) pDst;
srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16);
dstbump = (dstStep - (roi->width * sizeof(UINT32)));
static pstatus_t general_set_8u(
BYTE val,
BYTE* pDst,
- INT32 len)
+ UINT32 len)
{
memset((void*) pDst, (int) val, (size_t) len);
return PRIMITIVES_SUCCESS;
static pstatus_t general_set_32s(
INT32 val,
INT32* pDst,
- INT32 len)
+ UINT32 len)
{
INT32* dptr = (INT32*) pDst;
size_t span, remaining;
static pstatus_t general_set_32u(
UINT32 val,
UINT32* pDst,
- INT32 len)
+ UINT32 len)
{
UINT32* dptr = (UINT32*) pDst;
size_t span, remaining;
static pstatus_t sse2_set_8u(
BYTE val,
BYTE* pDst,
- INT32 len)
+ UINT32 len)
{
BYTE byte, *dptr;
__m128i xmm0;
static pstatus_t sse2_set_32u(
UINT32 val,
UINT32* pDst,
- INT32 len)
+ UINT32 len)
{
const primitives_t* prim = primitives_get_generic();
UINT32* dptr = (UINT32*) pDst;
static pstatus_t sse2_set_32s(
INT32 val,
INT32* pDst,
- INT32 len)
+ UINT32 len)
{
UINT32 uval = *((UINT32*) &val);
return sse2_set_32u(uval, (UINT32*) pDst, len);
/* ------------------------------------------------------------------------- */
static pstatus_t general_lShiftC_16s(
const INT16* pSrc,
- INT32 val,
+ UINT32 val,
INT16* pDst,
- INT32 len)
+ UINT32 len)
{
if (val == 0) return PRIMITIVES_SUCCESS;
/* ------------------------------------------------------------------------- */
static pstatus_t general_rShiftC_16s(
const INT16* pSrc,
- INT32 val,
+ UINT32 val,
INT16* pDst,
- INT32 len)
+ UINT32 len)
{
if (val == 0) return PRIMITIVES_SUCCESS;
/* ------------------------------------------------------------------------- */
static pstatus_t general_lShiftC_16u(
const UINT16* pSrc,
- INT32 val,
+ UINT32 val,
UINT16* pDst,
- INT32 len)
+ UINT32 len)
{
if (val == 0) return PRIMITIVES_SUCCESS;
/* ------------------------------------------------------------------------- */
static pstatus_t general_rShiftC_16u(
const UINT16* pSrc,
- INT32 val,
+ UINT32 val,
UINT16* pDst,
- INT32 len)
+ UINT32 len)
{
if (val == 0) return PRIMITIVES_SUCCESS;
const INT16* pSrc,
INT32 val,
INT16* pDst,
- INT32 len)
+ UINT32 len)
{
if (val == 0) return PRIMITIVES_SUCCESS;
const UINT16* pSrc,
INT32 val,
UINT16* pDst,
- INT32 len)
+ UINT32 len)
{
if (val == 0) return PRIMITIVES_SUCCESS;
# if !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS)
/* ------------------------------------------------------------------------- */
SSE3_SCD_ROUTINE(sse2_lShiftC_16s, INT16, generic->lShiftC_16s,
- _mm_slli_epi16, *dptr++ = *sptr++ << val)
+ _mm_slli_epi16, *dptr++ = *sptr++ << val)
/* ------------------------------------------------------------------------- */
SSE3_SCD_ROUTINE(sse2_rShiftC_16s, INT16, generic->rShiftC_16s,
- _mm_srai_epi16, *dptr++ = *sptr++ >> val)
+ _mm_srai_epi16, *dptr++ = *sptr++ >> val)
/* ------------------------------------------------------------------------- */
SSE3_SCD_ROUTINE(sse2_lShiftC_16u, UINT16, generic->lShiftC_16u,
- _mm_slli_epi16, *dptr++ = *sptr++ << val)
+ _mm_slli_epi16, *dptr++ = *sptr++ << val)
/* ------------------------------------------------------------------------- */
SSE3_SCD_ROUTINE(sse2_rShiftC_16u, UINT16, generic->rShiftC_16u,
- _mm_srli_epi16, *dptr++ = *sptr++ >> val)
+ _mm_srli_epi16, *dptr++ = *sptr++ >> val)
# endif /* !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS) */
#endif
static pstatus_t general_sign_16s(
const INT16* pSrc,
INT16* pDst,
- INT32 len)
+ UINT32 len)
{
while (len--)
{
static pstatus_t ssse3_sign_16s(
const INT16* pSrc,
INT16* pDst,
- INT32 len)
+ UINT32 len)
{
const INT16* sptr = (const INT16*) pSrc;
INT16* dptr = (INT16*) pDst;
* SCD = Source, Constant, Destination
*/
#define SSE3_SCD_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \
- static pstatus_t _name_(const _type_ *pSrc, INT32 val, _type_ *pDst, INT32 len) \
- { \
- INT32 shifts; \
- UINT32 offBeatMask; \
- const _type_ *sptr = pSrc; \
- _type_ *dptr = pDst; \
- size_t count; \
- if (len < 16) /* pointless if too small */ \
- { \
- return _fallback_(pSrc, val, pDst, len); \
- } \
- if (sizeof(_type_) == 1) shifts = 1; \
- else if (sizeof(_type_) == 2) shifts = 2; \
- else if (sizeof(_type_) == 4) shifts = 3; \
- else if (sizeof(_type_) == 8) shifts = 4; \
- offBeatMask = (1 << (shifts - 1)) - 1; \
- if ((ULONG_PTR) pDst & offBeatMask) \
- { \
- /* Incrementing the pointer skips over 16-byte boundary. */ \
- return _fallback_(pSrc, val, pDst, len); \
- } \
- /* Get to the 16-byte boundary now. */ \
- while ((ULONG_PTR) dptr & 0x0f) \
- { \
- _slowWay_; \
- if (--len == 0) return PRIMITIVES_SUCCESS; \
- } \
- /* Use 8 128-bit SSE registers. */ \
- count = len >> (8-shifts); \
- len -= count << (8-shifts); \
- if ((ULONG_PTR) sptr & 0x0f) \
- { \
- while (count--) \
- { \
- __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; \
- xmm0 = _mm_lddqu_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm1 = _mm_lddqu_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm2 = _mm_lddqu_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm3 = _mm_lddqu_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm4 = _mm_lddqu_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm5 = _mm_lddqu_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm6 = _mm_lddqu_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm7 = _mm_lddqu_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm0 = _op_(xmm0, val); \
- xmm1 = _op_(xmm1, val); \
- xmm2 = _op_(xmm2, val); \
- xmm3 = _op_(xmm3, val); \
- xmm4 = _op_(xmm4, val); \
- xmm5 = _op_(xmm5, val); \
- xmm6 = _op_(xmm6, val); \
- xmm7 = _op_(xmm7, val); \
- _mm_store_si128((__m128i *) dptr, xmm0); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm1); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm2); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm3); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm4); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm5); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm6); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm7); \
- dptr += (16/sizeof(_type_)); \
- } \
- } \
- else \
- { \
- while (count--) \
- { \
- __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; \
- xmm0 = _mm_load_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm1 = _mm_load_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm2 = _mm_load_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm3 = _mm_load_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm4 = _mm_load_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm5 = _mm_load_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm6 = _mm_load_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm7 = _mm_load_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm0 = _op_(xmm0, val); \
- xmm1 = _op_(xmm1, val); \
- xmm2 = _op_(xmm2, val); \
- xmm3 = _op_(xmm3, val); \
- xmm4 = _op_(xmm4, val); \
- xmm5 = _op_(xmm5, val); \
- xmm6 = _op_(xmm6, val); \
- xmm7 = _op_(xmm7, val); \
- _mm_store_si128((__m128i *) dptr, xmm0); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm1); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm2); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm3); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm4); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm5); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm6); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm7); \
- dptr += (16/sizeof(_type_)); \
- } \
- } \
- /* Use a single 128-bit SSE register. */ \
- count = len >> (5-shifts); \
- len -= count << (5-shifts); \
- while (count--) \
- { \
- __m128i xmm0 = LOAD_SI128(sptr); sptr += (16/sizeof(_type_)); \
- xmm0 = _op_(xmm0, val); \
- _mm_store_si128((__m128i *) dptr, xmm0); \
- dptr += (16/sizeof(_type_)); \
- } \
- /* Finish off the remainder. */ \
- while (len--) { _slowWay_; } \
- return PRIMITIVES_SUCCESS; \
+ static pstatus_t _name_(const _type_ *pSrc, UINT32 val, _type_ *pDst, UINT32 len) \
+ { \
+ INT32 shifts; \
+ UINT32 offBeatMask; \
+ const _type_ *sptr = pSrc; \
+ _type_ *dptr = pDst; \
+ size_t count; \
+ if (len < 16) /* pointless if too small */ \
+ { \
+ return _fallback_(pSrc, val, pDst, len); \
+ } \
+ if (sizeof(_type_) == 1) shifts = 1; \
+ else if (sizeof(_type_) == 2) shifts = 2; \
+ else if (sizeof(_type_) == 4) shifts = 3; \
+ else if (sizeof(_type_) == 8) shifts = 4; \
+ offBeatMask = (1 << (shifts - 1)) - 1; \
+ if ((ULONG_PTR) pDst & offBeatMask) \
+ { \
+ /* Incrementing the pointer skips over 16-byte boundary. */ \
+ return _fallback_(pSrc, val, pDst, len); \
+ } \
+ /* Get to the 16-byte boundary now. */ \
+ while ((ULONG_PTR) dptr & 0x0f) \
+ { \
+ _slowWay_; \
+ if (--len == 0) return PRIMITIVES_SUCCESS; \
+ } \
+ /* Use 8 128-bit SSE registers. */ \
+ count = len >> (8-shifts); \
+ len -= count << (8-shifts); \
+ if ((ULONG_PTR) sptr & 0x0f) \
+ { \
+ while (count--) \
+ { \
+ __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; \
+ xmm0 = _mm_lddqu_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm1 = _mm_lddqu_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm2 = _mm_lddqu_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm3 = _mm_lddqu_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm4 = _mm_lddqu_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm5 = _mm_lddqu_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm6 = _mm_lddqu_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm7 = _mm_lddqu_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm0 = _op_(xmm0, val); \
+ xmm1 = _op_(xmm1, val); \
+ xmm2 = _op_(xmm2, val); \
+ xmm3 = _op_(xmm3, val); \
+ xmm4 = _op_(xmm4, val); \
+ xmm5 = _op_(xmm5, val); \
+ xmm6 = _op_(xmm6, val); \
+ xmm7 = _op_(xmm7, val); \
+ _mm_store_si128((__m128i *) dptr, xmm0); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm1); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm2); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm3); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm4); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm5); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm6); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm7); \
+ dptr += (16/sizeof(_type_)); \
+ } \
+ } \
+ else \
+ { \
+ while (count--) \
+ { \
+ __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; \
+ xmm0 = _mm_load_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm1 = _mm_load_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm2 = _mm_load_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm3 = _mm_load_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm4 = _mm_load_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm5 = _mm_load_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm6 = _mm_load_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm7 = _mm_load_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm0 = _op_(xmm0, val); \
+ xmm1 = _op_(xmm1, val); \
+ xmm2 = _op_(xmm2, val); \
+ xmm3 = _op_(xmm3, val); \
+ xmm4 = _op_(xmm4, val); \
+ xmm5 = _op_(xmm5, val); \
+ xmm6 = _op_(xmm6, val); \
+ xmm7 = _op_(xmm7, val); \
+ _mm_store_si128((__m128i *) dptr, xmm0); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm1); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm2); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm3); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm4); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm5); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm6); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm7); \
+ dptr += (16/sizeof(_type_)); \
+ } \
+ } \
+ /* Use a single 128-bit SSE register. */ \
+ count = len >> (5-shifts); \
+ len -= count << (5-shifts); \
+ while (count--) \
+ { \
+ __m128i xmm0 = LOAD_SI128(sptr); sptr += (16/sizeof(_type_)); \
+ xmm0 = _op_(xmm0, val); \
+ _mm_store_si128((__m128i *) dptr, xmm0); \
+ dptr += (16/sizeof(_type_)); \
+ } \
+ /* Finish off the remainder. */ \
+ while (len--) { _slowWay_; } \
+ return PRIMITIVES_SUCCESS; \
}
/* ----------------------------------------------------------------------------
*/
#define SSE3_SCD_PRE_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \
pstatus_t _name_(const _type_ *pSrc, _type_ val, _type_ *pDst, INT32 len) \
- { \
- int shifts; \
- UINT32 offBeatMask; \
- const _type_ *sptr = pSrc; \
- _type_ *dptr = pDst; \
- size_t count; \
- __m128i xmm0; \
- if (len < 16) /* pointless if too small */ \
- { \
- return _fallback_(pSrc, val, pDst, len); \
- } \
- if (sizeof(_type_) == 1) shifts = 1; \
- else if (sizeof(_type_) == 2) shifts = 2; \
- else if (sizeof(_type_) == 4) shifts = 3; \
- else if (sizeof(_type_) == 8) shifts = 4; \
- offBeatMask = (1 << (shifts - 1)) - 1; \
- if ((ULONG_PTR) pDst & offBeatMask) \
- { \
- /* Incrementing the pointer skips over 16-byte boundary. */ \
- return _fallback_(pSrc, val, pDst, len); \
- } \
- /* Get to the 16-byte boundary now. */ \
- while ((ULONG_PTR) dptr & 0x0f) \
- { \
- _slowWay_; \
- if (--len == 0) return PRIMITIVES_SUCCESS; \
- } \
- /* Use 4 128-bit SSE registers. */ \
- count = len >> (7-shifts); \
- len -= count << (7-shifts); \
- xmm0 = _mm_set1_epi32(val); \
- if ((ULONG_PTR) sptr & 0x0f) \
- { \
- while (count--) \
- { \
- __m128i xmm1, xmm2, xmm3, xmm4; \
- xmm1 = _mm_lddqu_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm2 = _mm_lddqu_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm3 = _mm_lddqu_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm4 = _mm_lddqu_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm1 = _op_(xmm1, xmm0); \
- xmm2 = _op_(xmm2, xmm0); \
- xmm3 = _op_(xmm3, xmm0); \
- xmm4 = _op_(xmm4, xmm0); \
- _mm_store_si128((__m128i *) dptr, xmm1); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm2); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm3); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm4); \
- dptr += (16/sizeof(_type_)); \
- } \
- } \
- else \
- { \
- while (count--) \
- { \
- __m128i xmm1, xmm2, xmm3, xmm4; \
- xmm1 = _mm_load_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm2 = _mm_load_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm3 = _mm_load_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm4 = _mm_load_si128((__m128i *) sptr); \
- sptr += (16/sizeof(_type_)); \
- xmm1 = _op_(xmm1, xmm0); \
- xmm2 = _op_(xmm2, xmm0); \
- xmm3 = _op_(xmm3, xmm0); \
- xmm4 = _op_(xmm4, xmm0); \
- _mm_store_si128((__m128i *) dptr, xmm1); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm2); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm3); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm4); \
- dptr += (16/sizeof(_type_)); \
- } \
- } \
- /* Use a single 128-bit SSE register. */ \
- count = len >> (5-shifts); \
- len -= count << (5-shifts); \
- while (count--) \
- { \
- __m128i xmm1 = LOAD_SI128(sptr); sptr += (16/sizeof(_type_)); \
- xmm1 = _op_(xmm1, xmm0); \
- _mm_store_si128((__m128i *) dptr, xmm1); \
- dptr += (16/sizeof(_type_)); \
- } \
- /* Finish off the remainder. */ \
- while (len--) { _slowWay_; } \
- return PRIMITIVES_SUCCESS; \
+ { \
+ int shifts; \
+ UINT32 offBeatMask; \
+ const _type_ *sptr = pSrc; \
+ _type_ *dptr = pDst; \
+ size_t count; \
+ __m128i xmm0; \
+ if (len < 16) /* pointless if too small */ \
+ { \
+ return _fallback_(pSrc, val, pDst, len); \
+ } \
+ if (sizeof(_type_) == 1) shifts = 1; \
+ else if (sizeof(_type_) == 2) shifts = 2; \
+ else if (sizeof(_type_) == 4) shifts = 3; \
+ else if (sizeof(_type_) == 8) shifts = 4; \
+ offBeatMask = (1 << (shifts - 1)) - 1; \
+ if ((ULONG_PTR) pDst & offBeatMask) \
+ { \
+ /* Incrementing the pointer skips over 16-byte boundary. */ \
+ return _fallback_(pSrc, val, pDst, len); \
+ } \
+ /* Get to the 16-byte boundary now. */ \
+ while ((ULONG_PTR) dptr & 0x0f) \
+ { \
+ _slowWay_; \
+ if (--len == 0) return PRIMITIVES_SUCCESS; \
+ } \
+ /* Use 4 128-bit SSE registers. */ \
+ count = len >> (7-shifts); \
+ len -= count << (7-shifts); \
+ xmm0 = _mm_set1_epi32(val); \
+ if ((ULONG_PTR) sptr & 0x0f) \
+ { \
+ while (count--) \
+ { \
+ __m128i xmm1, xmm2, xmm3, xmm4; \
+ xmm1 = _mm_lddqu_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm2 = _mm_lddqu_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm3 = _mm_lddqu_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm4 = _mm_lddqu_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm1 = _op_(xmm1, xmm0); \
+ xmm2 = _op_(xmm2, xmm0); \
+ xmm3 = _op_(xmm3, xmm0); \
+ xmm4 = _op_(xmm4, xmm0); \
+ _mm_store_si128((__m128i *) dptr, xmm1); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm2); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm3); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm4); \
+ dptr += (16/sizeof(_type_)); \
+ } \
+ } \
+ else \
+ { \
+ while (count--) \
+ { \
+ __m128i xmm1, xmm2, xmm3, xmm4; \
+ xmm1 = _mm_load_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm2 = _mm_load_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm3 = _mm_load_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm4 = _mm_load_si128((__m128i *) sptr); \
+ sptr += (16/sizeof(_type_)); \
+ xmm1 = _op_(xmm1, xmm0); \
+ xmm2 = _op_(xmm2, xmm0); \
+ xmm3 = _op_(xmm3, xmm0); \
+ xmm4 = _op_(xmm4, xmm0); \
+ _mm_store_si128((__m128i *) dptr, xmm1); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm2); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm3); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm4); \
+ dptr += (16/sizeof(_type_)); \
+ } \
+ } \
+ /* Use a single 128-bit SSE register. */ \
+ count = len >> (5-shifts); \
+ len -= count << (5-shifts); \
+ while (count--) \
+ { \
+ __m128i xmm1 = LOAD_SI128(sptr); sptr += (16/sizeof(_type_)); \
+ xmm1 = _op_(xmm1, xmm0); \
+ _mm_store_si128((__m128i *) dptr, xmm1); \
+ dptr += (16/sizeof(_type_)); \
+ } \
+ /* Finish off the remainder. */ \
+ while (len--) { _slowWay_; } \
+ return PRIMITIVES_SUCCESS; \
}
/* ----------------------------------------------------------------------------
* SSD = Source1, Source2, Destination
*/
#define SSE3_SSD_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \
- pstatus_t _name_(const _type_ *pSrc1, const _type_ *pSrc2, _type_ *pDst, INT32 len) \
- { \
- int shifts; \
- UINT32 offBeatMask; \
- const _type_ *sptr1 = pSrc1; \
- const _type_ *sptr2 = pSrc2; \
- _type_ *dptr = pDst; \
- size_t count; \
- if (len < 16) /* pointless if too small */ \
- { \
- return _fallback_(pSrc1, pSrc2, pDst, len); \
- } \
- if (sizeof(_type_) == 1) shifts = 1; \
- else if (sizeof(_type_) == 2) shifts = 2; \
- else if (sizeof(_type_) == 4) shifts = 3; \
- else if (sizeof(_type_) == 8) shifts = 4; \
- offBeatMask = (1 << (shifts - 1)) - 1; \
- if ((ULONG_PTR) pDst & offBeatMask) \
- { \
- /* Incrementing the pointer skips over 16-byte boundary. */ \
- return _fallback_(pSrc1, pSrc2, pDst, len); \
- } \
- /* Get to the 16-byte boundary now. */ \
- while ((ULONG_PTR) dptr & 0x0f) \
- { \
- _slowWay_; \
- if (--len == 0) return PRIMITIVES_SUCCESS; \
- } \
- /* Use 4 128-bit SSE registers. */ \
- count = len >> (7-shifts); \
- len -= count << (7-shifts); \
- if (((ULONG_PTR) sptr1 & 0x0f) || ((ULONG_PTR) sptr2 & 0x0f)) \
- { \
- /* Unaligned loads */ \
- while (count--) \
- { \
- __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; \
- xmm0 = _mm_lddqu_si128((__m128i *) sptr1); \
- sptr1 += (16/sizeof(_type_)); \
- xmm1 = _mm_lddqu_si128((__m128i *) sptr1); \
- sptr1 += (16/sizeof(_type_)); \
- xmm2 = _mm_lddqu_si128((__m128i *) sptr1); \
- sptr1 += (16/sizeof(_type_)); \
- xmm3 = _mm_lddqu_si128((__m128i *) sptr1); \
- sptr1 += (16/sizeof(_type_)); \
- xmm4 = _mm_lddqu_si128((__m128i *) sptr2); \
- sptr2 += (16/sizeof(_type_)); \
- xmm5 = _mm_lddqu_si128((__m128i *) sptr2); \
- sptr2 += (16/sizeof(_type_)); \
- xmm6 = _mm_lddqu_si128((__m128i *) sptr2); \
- sptr2 += (16/sizeof(_type_)); \
- xmm7 = _mm_lddqu_si128((__m128i *) sptr2); \
- sptr2 += (16/sizeof(_type_)); \
- xmm0 = _op_(xmm0, xmm4); \
- xmm1 = _op_(xmm1, xmm5); \
- xmm2 = _op_(xmm2, xmm6); \
- xmm3 = _op_(xmm3, xmm7); \
- _mm_store_si128((__m128i *) dptr, xmm0); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm1); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm2); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm3); \
- dptr += (16/sizeof(_type_)); \
- } \
- } \
- else \
- { \
- /* Aligned loads */ \
- while (count--) \
- { \
- __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; \
- xmm0 = _mm_load_si128((__m128i *) sptr1); \
- sptr1 += (16/sizeof(_type_)); \
- xmm1 = _mm_load_si128((__m128i *) sptr1); \
- sptr1 += (16/sizeof(_type_)); \
- xmm2 = _mm_load_si128((__m128i *) sptr1); \
- sptr1 += (16/sizeof(_type_)); \
- xmm3 = _mm_load_si128((__m128i *) sptr1); \
- sptr1 += (16/sizeof(_type_)); \
- xmm4 = _mm_load_si128((__m128i *) sptr2); \
- sptr2 += (16/sizeof(_type_)); \
- xmm5 = _mm_load_si128((__m128i *) sptr2); \
- sptr2 += (16/sizeof(_type_)); \
- xmm6 = _mm_load_si128((__m128i *) sptr2); \
- sptr2 += (16/sizeof(_type_)); \
- xmm7 = _mm_load_si128((__m128i *) sptr2); \
- sptr2 += (16/sizeof(_type_)); \
- xmm0 = _op_(xmm0, xmm4); \
- xmm1 = _op_(xmm1, xmm5); \
- xmm2 = _op_(xmm2, xmm6); \
- xmm3 = _op_(xmm3, xmm7); \
- _mm_store_si128((__m128i *) dptr, xmm0); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm1); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm2); \
- dptr += (16/sizeof(_type_)); \
- _mm_store_si128((__m128i *) dptr, xmm3); \
- dptr += (16/sizeof(_type_)); \
- } \
- } \
- /* Use a single 128-bit SSE register. */ \
- count = len >> (5-shifts); \
- len -= count << (5-shifts); \
- while (count--) \
- { \
- __m128i xmm0, xmm1; \
- xmm0 = LOAD_SI128(sptr1); sptr1 += (16/sizeof(_type_)); \
- xmm1 = LOAD_SI128(sptr2); sptr2 += (16/sizeof(_type_)); \
- xmm0 = _op_(xmm0, xmm1); \
- _mm_store_si128((__m128i *) dptr, xmm0); \
- dptr += (16/sizeof(_type_)); \
- } \
- /* Finish off the remainder. */ \
- while (len--) { _slowWay_; } \
- return PRIMITIVES_SUCCESS; \
+ pstatus_t _name_(const _type_ *pSrc1, const _type_ *pSrc2, _type_ *pDst, UINT32 len) \
+ { \
+ int shifts; \
+ UINT32 offBeatMask; \
+ const _type_ *sptr1 = pSrc1; \
+ const _type_ *sptr2 = pSrc2; \
+ _type_ *dptr = pDst; \
+ size_t count; \
+ if (len < 16) /* pointless if too small */ \
+ { \
+ return _fallback_(pSrc1, pSrc2, pDst, len); \
+ } \
+ if (sizeof(_type_) == 1) shifts = 1; \
+ else if (sizeof(_type_) == 2) shifts = 2; \
+ else if (sizeof(_type_) == 4) shifts = 3; \
+ else if (sizeof(_type_) == 8) shifts = 4; \
+ offBeatMask = (1 << (shifts - 1)) - 1; \
+ if ((ULONG_PTR) pDst & offBeatMask) \
+ { \
+ /* Incrementing the pointer skips over 16-byte boundary. */ \
+ return _fallback_(pSrc1, pSrc2, pDst, len); \
+ } \
+ /* Get to the 16-byte boundary now. */ \
+ while ((ULONG_PTR) dptr & 0x0f) \
+ { \
+ pstatus_t status; \
+ status = _slowWay_; \
+ if (status != PRIMITIVES_SUCCESS) return status; \
+ if (--len == 0) return PRIMITIVES_SUCCESS; \
+ } \
+ /* Use 4 128-bit SSE registers. */ \
+ count = len >> (7-shifts); \
+ len -= count << (7-shifts); \
+ if (((ULONG_PTR) sptr1 & 0x0f) || ((ULONG_PTR) sptr2 & 0x0f)) \
+ { \
+ /* Unaligned loads */ \
+ while (count--) \
+ { \
+ __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; \
+ xmm0 = _mm_lddqu_si128((__m128i *) sptr1); \
+ sptr1 += (16/sizeof(_type_)); \
+ xmm1 = _mm_lddqu_si128((__m128i *) sptr1); \
+ sptr1 += (16/sizeof(_type_)); \
+ xmm2 = _mm_lddqu_si128((__m128i *) sptr1); \
+ sptr1 += (16/sizeof(_type_)); \
+ xmm3 = _mm_lddqu_si128((__m128i *) sptr1); \
+ sptr1 += (16/sizeof(_type_)); \
+ xmm4 = _mm_lddqu_si128((__m128i *) sptr2); \
+ sptr2 += (16/sizeof(_type_)); \
+ xmm5 = _mm_lddqu_si128((__m128i *) sptr2); \
+ sptr2 += (16/sizeof(_type_)); \
+ xmm6 = _mm_lddqu_si128((__m128i *) sptr2); \
+ sptr2 += (16/sizeof(_type_)); \
+ xmm7 = _mm_lddqu_si128((__m128i *) sptr2); \
+ sptr2 += (16/sizeof(_type_)); \
+ xmm0 = _op_(xmm0, xmm4); \
+ xmm1 = _op_(xmm1, xmm5); \
+ xmm2 = _op_(xmm2, xmm6); \
+ xmm3 = _op_(xmm3, xmm7); \
+ _mm_store_si128((__m128i *) dptr, xmm0); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm1); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm2); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm3); \
+ dptr += (16/sizeof(_type_)); \
+ } \
+ } \
+ else \
+ { \
+ /* Aligned loads */ \
+ while (count--) \
+ { \
+ __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; \
+ xmm0 = _mm_load_si128((__m128i *) sptr1); \
+ sptr1 += (16/sizeof(_type_)); \
+ xmm1 = _mm_load_si128((__m128i *) sptr1); \
+ sptr1 += (16/sizeof(_type_)); \
+ xmm2 = _mm_load_si128((__m128i *) sptr1); \
+ sptr1 += (16/sizeof(_type_)); \
+ xmm3 = _mm_load_si128((__m128i *) sptr1); \
+ sptr1 += (16/sizeof(_type_)); \
+ xmm4 = _mm_load_si128((__m128i *) sptr2); \
+ sptr2 += (16/sizeof(_type_)); \
+ xmm5 = _mm_load_si128((__m128i *) sptr2); \
+ sptr2 += (16/sizeof(_type_)); \
+ xmm6 = _mm_load_si128((__m128i *) sptr2); \
+ sptr2 += (16/sizeof(_type_)); \
+ xmm7 = _mm_load_si128((__m128i *) sptr2); \
+ sptr2 += (16/sizeof(_type_)); \
+ xmm0 = _op_(xmm0, xmm4); \
+ xmm1 = _op_(xmm1, xmm5); \
+ xmm2 = _op_(xmm2, xmm6); \
+ xmm3 = _op_(xmm3, xmm7); \
+ _mm_store_si128((__m128i *) dptr, xmm0); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm1); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm2); \
+ dptr += (16/sizeof(_type_)); \
+ _mm_store_si128((__m128i *) dptr, xmm3); \
+ dptr += (16/sizeof(_type_)); \
+ } \
+ } \
+ /* Use a single 128-bit SSE register. */ \
+ count = len >> (5-shifts); \
+ len -= count << (5-shifts); \
+ while (count--) \
+ { \
+ __m128i xmm0, xmm1; \
+ xmm0 = LOAD_SI128(sptr1); sptr1 += (16/sizeof(_type_)); \
+ xmm1 = LOAD_SI128(sptr2); sptr2 += (16/sizeof(_type_)); \
+ xmm0 = _op_(xmm0, xmm1); \
+ _mm_store_si128((__m128i *) dptr, xmm0); \
+ dptr += (16/sizeof(_type_)); \
+ } \
+ /* Finish off the remainder. */ \
+ while (len--) { _slowWay_; } \
+ return PRIMITIVES_SUCCESS; \
}
#endif /* !__PRIM_TEMPLATES_H_INCLUDED__ */
if (!pPrimitivesGenericInitialized)
primitives_init_generic();
- return &pPrimitives;
+ return &pPrimitivesGeneric;
}
pstatus_t status;
INT16 ALIGN(src1[FUNC_TEST_SIZE + 3]), ALIGN(src2[FUNC_TEST_SIZE + 3]),
- ALIGN(d1[FUNC_TEST_SIZE + 3]), ALIGN(d2[FUNC_TEST_SIZE + 3]);
+ ALIGN(d1[FUNC_TEST_SIZE + 3]), ALIGN(d2[FUNC_TEST_SIZE + 3]);
char testStr[256];
testStr[0] = '\0';
static BOOL test_add16s_speed(void)
{
BYTE ALIGN(src1[MAX_TEST_SIZE + 3]), ALIGN(src2[MAX_TEST_SIZE + 3]),
- ALIGN(dst[MAX_TEST_SIZE + 3]);
+ ALIGN(dst[MAX_TEST_SIZE + 3]);
if (!g_TestPrimitivesPerformance)
return TRUE;
winpr_RAND(src2, sizeof(src2));
if (!speed_test("add16s", "aligned", g_Iterations,
- generic->add_16s, optimized->add_16s,
+ (speed_test_fkt)generic->add_16s,
+ (speed_test_fkt)optimized->add_16s,
src1, src2, dst, FUNC_TEST_SIZE))
return FALSE;
if (!test_add16s_func())
return -1;
- if (!test_add16s_speed())
- return -1;
+ if (g_TestPrimitivesPerformance)
+ {
+ if (!test_add16s_speed())
+ return -1;
+ }
return 0;
}
#define GRN(_c_) (((_c_) & 0x0000FF00U) >> 8)
#define BLU(_c_) ((_c_) & 0x000000FFU)
#define TOLERANCE 1
-#define PIXEL(_addr_, _bytes_, _x_, _y_) \
- ((UINT32 *) (((BYTE *) (_addr_)) + (_x_)*4 + (_y_)*(_bytes_)))
+static inline const UINT32* PIXEL(const BYTE* _addr_, UINT32 _bytes_, UINT32 _x_, UINT32 _y_)
+{
+ const BYTE* addr = _addr_ + _x_ * sizeof(UINT32) + _y_ * _bytes_;
+
+ return (const UINT32*)addr;
+}
+
#define SRC1_WIDTH 6
#define SRC1_HEIGHT 6
#define SRC2_WIDTH 7
/* ------------------------------------------------------------------------- */
static UINT32 alpha_add(
- UINT32 c1,
- UINT32 c2)
+ UINT32 c1,
+ UINT32 c2)
{
UINT32 a1 = ALF(c1);
UINT32 r1 = RED(c1);
/* ------------------------------------------------------------------------- */
static UINT32 colordist(
- UINT32 c1,
- UINT32 c2)
+ UINT32 c1,
+ UINT32 c2)
{
int d, maxd = 0;
d = ABS(ALF(c1) - ALF(c2));
}
/* ------------------------------------------------------------------------- */
-static BOOL check(const BYTE* pSrc1, INT32 src1Step,
- const BYTE* pSrc2, INT32 src2Step,
- BYTE* pDst, INT32 dstStep,
- INT32 width, INT32 height)
+static BOOL check(const BYTE* pSrc1, UINT32 src1Step,
+ const BYTE* pSrc2, UINT32 src2Step,
+ BYTE* pDst, UINT32 dstStep,
+ UINT32 width, UINT32 height)
{
UINT32 x, y;
for (y = 0; y < height; ++y)
static BOOL test_alphaComp_func(void)
{
pstatus_t status;
- BYTE ALIGN(src1[SRC1_WIDTH * SRC1_HEIGHT]);
- BYTE ALIGN(src2[SRC2_WIDTH * SRC2_HEIGHT]);
- BYTE ALIGN(dst1[DST_WIDTH * DST_HEIGHT]);
- char testStr[256];
+ BYTE ALIGN(src1[SRC1_WIDTH * SRC1_HEIGHT * 4]);
+ BYTE ALIGN(src2[SRC2_WIDTH * SRC2_HEIGHT * 4]);
+ BYTE ALIGN(dst1[DST_WIDTH * DST_HEIGHT * 4]);
UINT32* ptr;
UINT32 i;
- testStr[0] = '\0';
+
winpr_RAND((BYTE*)src1, sizeof(src1));
+
/* Special-case the first two values */
src1[0] &= 0x00FFFFFFU;
src1[1] |= 0xFF000000U;
memset(dst1, 0, sizeof(dst1));
status = generic->alphaComp_argb(src1, 4 * SRC1_WIDTH,
- src2, 4 * SRC2_WIDTH,
- dst1, 4 * DST_WIDTH, TEST_WIDTH, TEST_HEIGHT);
+ src2, 4 * SRC2_WIDTH,
+ dst1, 4 * DST_WIDTH, TEST_WIDTH, TEST_HEIGHT);
if (status != PRIMITIVES_SUCCESS)
return FALSE;
return FALSE;
status = optimized->alphaComp_argb((const BYTE*) src1, 4 * SRC1_WIDTH,
- (const BYTE*) src2, 4 * SRC2_WIDTH,
- (BYTE*) dst1, 4 * DST_WIDTH, TEST_WIDTH, TEST_HEIGHT);
+ (const BYTE*) src2, 4 * SRC2_WIDTH,
+ (BYTE*) dst1, 4 * DST_WIDTH, TEST_WIDTH, TEST_HEIGHT);
if (status != PRIMITIVES_SUCCESS)
return FALSE;
memset(dst1, 0, sizeof(dst1));
if (!speed_test("add16s", "aligned", g_Iterations,
- generic->alphaComp_argb, optimized->alphaComp_argb,
+ (speed_test_fkt)generic->alphaComp_argb,
+ (speed_test_fkt)optimized->alphaComp_argb,
src1, 4 * SRC1_WIDTH,
src2, 4 * SRC2_WIDTH,
dst1, 4 * DST_WIDTH, TEST_WIDTH, TEST_HEIGHT))
if (!test_alphaComp_func())
return -1;
- if (!test_alphaComp_speed())
- return -1;
+ if (g_TestPrimitivesPerformance)
+ {
+ if (!test_alphaComp_speed())
+ return -1;
+ }
return 0;
}
#include "prim_test.h"
#define FUNC_TEST_SIZE 65536
-static const int ANDOR_PRETEST_ITERATIONS = 100000;
-static const int TEST_TIME = 2.0; // seconds
#define VALUE (0xA5A5A5A5U)
/* ========================================================================= */
-static BOOL test_and_32u_func(void)
+static BOOL test_and_32u_impl(const char* name, __andC_32u_t fkt,
+ const UINT32* src, const UINT32 val,
+ UINT32* dst, size_t size)
{
- UINT32 ALIGN(src[FUNC_TEST_SIZE + 3]), ALIGN(dst[FUNC_TEST_SIZE + 3]);
- int failed = 0;
- int i;
- char testStr[256];
- testStr[0] = '\0';
- winpr_RAND(src, sizeof(src));
- generic->andC_32u(src + 1, VALUE, dst + 1, FUNC_TEST_SIZE);
- strcat(testStr, " general");
+ size_t i;
+ pstatus_t status = fkt(src, val, dst, size);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
- for (i = 1; i <= FUNC_TEST_SIZE; ++i)
+ for (i = 0; i < size; ++i)
{
- if (dst[i] != (src[i] & VALUE))
+ if (dst[i] != (src[i] & val))
{
- printf("AND-general FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n",
- i, src[i], VALUE, src[i] & VALUE, dst[i]);
- ++failed;
+ printf("AND %s FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n",
+ name, i, src[i], val, src[i] & val, dst[i]);
+ return FALSE;
}
}
-#ifdef WITH_SSE2
-
- if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
- {
- strcat(testStr, " SSE3");
- /* Aligned */
- memset(dst, 0, sizeof(dst));
- sse3_andC_32u(src + 1, VALUE, dst + 1, FUNC_TEST_SIZE);
-
- for (i = 1; i <= FUNC_TEST_SIZE; ++i)
- {
- if (dst[i] != (src[i] & VALUE))
- {
- printf("AND-SSE-aligned FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n",
- i, src[i], VALUE, src[i] & VALUE, dst[i]);
- ++failed;
- }
- }
-
- /* Unaligned */
- memset(dst, 0, sizeof(dst));
- sse3_andC_32u(src + 1, VALUE, dst + 2, FUNC_TEST_SIZE);
+ return TRUE;
+}
- for (i = 1; i <= FUNC_TEST_SIZE; ++i)
- {
- if (dst[i + 1] != (src[i] & VALUE))
- {
- printf("AND-SSE-unaligned FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n",
- i, src[i], VALUE, src[i] & VALUE, dst[i + 1]);
- ++failed;
- }
- }
- }
+static BOOL test_and_32u_func(void)
+{
+ UINT32 ALIGN(src[FUNC_TEST_SIZE + 3]), ALIGN(dst[FUNC_TEST_SIZE + 3]);
-#endif /* i386 */
+ winpr_RAND((BYTE*)src, sizeof(src));
- if (!failed) printf("All and_32u tests passed (%s).\n", testStr);
+ if (!test_and_32u_impl("generic->andC_32u aligned", generic->andC_32u,
+ src + 1, VALUE, dst + 1, FUNC_TEST_SIZE))
+ return FALSE;
+ if (!test_and_32u_impl("generic->andC_32u unaligned", generic->andC_32u,
+ src + 1, VALUE, dst + 2, FUNC_TEST_SIZE))
+ return FALSE;
+ if (!test_and_32u_impl("optimized->andC_32u aligned", optimized->andC_32u,
+ src + 1, VALUE, dst + 1, FUNC_TEST_SIZE))
+ return FALSE;
+ if (!test_and_32u_impl("optimized->andC_32u unaligned", optimized->andC_32u,
+ src + 1, VALUE, dst + 2, FUNC_TEST_SIZE))
+ return FALSE;
- return (failed > 0) ? FAILURE : SUCCESS;
+ return TRUE;
}
/* ------------------------------------------------------------------------- */
static BOOL test_and_32u_speed(void)
{
UINT32 ALIGN(src[MAX_TEST_SIZE + 3]), ALIGN(dst[MAX_TEST_SIZE + 3]);
- winpr_RAND(src, sizeof(src));
- andC_32u_speed_test("and32u", "aligned", src, NULL, VALUE, dst,
- test_sizes, NUM_TEST_SIZES, ANDOR_PRETEST_ITERATIONS, TEST_TIME);
- andC_32u_speed_test("and32u", "unaligned", src + 1, NULL, VALUE, dst,
- test_sizes, NUM_TEST_SIZES, ANDOR_PRETEST_ITERATIONS, TEST_TIME);
- return SUCCESS;
+
+ winpr_RAND((BYTE*)src, sizeof(src));
+
+ if (!speed_test("andC_32u", "aligned", g_Iterations,
+ (speed_test_fkt)generic->andC_32u,
+ (speed_test_fkt)optimized->andC_32u,
+ src + 1, VALUE, dst + 1, MAX_TEST_SIZE))
+ return FALSE;
+ if (!speed_test("andC_32u", "unaligned", g_Iterations,
+ (speed_test_fkt)generic->andC_32u,
+ (speed_test_fkt)optimized->andC_32u,
+ src + 1, VALUE, dst + 2, MAX_TEST_SIZE))
+ return FALSE;
+
+ return TRUE;
}
/* ========================================================================= */
static BOOL check(const UINT32* src, const UINT32* dst, UINT32 size, UINT32 value)
{
UINT32 i;
- UINT32 failed = 0;
- for (i = 1; i <= size; ++i)
+ for (i = 0; i < size; ++i)
{
if (dst[i] != (src[i] | value))
{
printf("OR-general general FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n",
i, src[i], value, src[i] | value, dst[i]);
- ++failed;
+ return FALSE;
}
}
{
pstatus_t status;
UINT32 ALIGN(src[FUNC_TEST_SIZE + 3]), ALIGN(dst[FUNC_TEST_SIZE + 3]);
- char testStr[256];
- testStr[0] = '\0';
+
winpr_RAND((BYTE*)src, sizeof(src));
status = generic->orC_32u(src + 1, VALUE, dst + 1, FUNC_TEST_SIZE);
winpr_RAND((BYTE*)src, sizeof(src));
if (!speed_test("add16s", "aligned", g_Iterations,
- generic->orC_32u, optimized->orC_32u,
+ (speed_test_fkt)generic->orC_32u,
+ (speed_test_fkt)optimized->orC_32u,
src + 1, VALUE, dst + 1, FUNC_TEST_SIZE))
return FALSE;
if (!test_and_32u_func())
return -1;
- if (!test_and_32u_speed())
- return -1;
-
if (!test_or_32u_func())
return -1;
- if (!test_or_32u_speed())
- return -1;
+ if (g_TestPrimitivesPerformance)
+ {
+ if (!test_and_32u_speed())
+ return -1;
+ if (!test_or_32u_speed())
+ return -1;
+ }
return 0;
}
static const float TEST_TIME = 4.0;
/* ------------------------------------------------------------------------- */
-int test_RGBToRGB_16s8u_P3AC4R_func(void)
+static BOOL test_RGBToRGB_16s8u_P3AC4R_func(void)
{
INT16 ALIGN(r[4096]), ALIGN(g[4096]), ALIGN(b[4096]);
UINT32 ALIGN(out1[4096]);
-#ifdef WITH_SSE2
UINT32 ALIGN(out2[4096]);
-#endif
int i;
- int failed = 0;
- char testStr[256];
+ BOOL failed = FALSE;
INT16* ptrs[3];
prim_size_t roi = { 64, 64 };
- testStr[0] = '\0';
+
winpr_RAND((BYTE*)r, sizeof(r));
winpr_RAND((BYTE*)g, sizeof(g));
winpr_RAND((BYTE*)b, sizeof(b));
ptrs[0] = r;
ptrs[1] = g;
ptrs[2] = b;
- generic->RGBToRGB_16s8u_P3AC4R((const INT16**) ptrs, 64 * 2,
- (BYTE*) out1, 64 * 4, &roi);
-#ifdef WITH_SSE2
+ if (generic->RGBToRGB_16s8u_P3AC4R((const INT16**) ptrs, 64 * 2,
+ (BYTE*) out1, 64 * 4, PIXEL_FORMAT_RGBA32,
+ &roi) != PRIMITIVES_SUCCESS)
+ return FALSE;
- if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE))
- {
- strcat(testStr, " SSE2");
- sse2_RGBToRGB_16s8u_P3AC4R((const INT16**) ptrs, 64 * 2,
- (BYTE*) out2, 64 * 4, &roi);
+ if (optimized->RGBToRGB_16s8u_P3AC4R((const INT16**) ptrs, 64 * 2,
+ (BYTE*) out2, 64 * 4, PIXEL_FORMAT_RGBA32,
+ &roi) != PRIMITIVES_SUCCESS)
+ return FALSE;
- for (i = 0; i < 4096; ++i)
+ for (i = 0; i < 4096; ++i)
+ {
+ if (out1[i] != out2[i])
{
- if (out1[i] != out2[i])
- {
- printf("RGBToRGB-SSE FAIL: out1[%d]=0x%08x out2[%d]=0x%08x\n",
- i, out1[i], i, out2[i]);
- failed = 1;
- }
+ printf("RGBToRGB-SSE FAIL: out1[%d]=0x%08x out2[%d]=0x%08x\n",
+ i, out1[i], i, out2[i]);
+ failed = TRUE;
}
}
-#endif /* i386 */
-
- if (!failed) printf("All RGBToRGB_16s8u_P3AC4R tests passed (%s).\n", testStr);
-
- return (failed > 0) ? FAILURE : SUCCESS;
+ return !failed;
}
/* ------------------------------------------------------------------------- */
-static const prim_size_t roi64x64 = { 64, 64 };
-STD_SPEED_TEST(
- rgb_to_argb_speed, INT16*, UINT32, dst = dst,
- TRUE, generic->RGBToRGB_16s8u_P3AC4R(
- (const INT16**) src1, 64 * 2, (BYTE*) dst, 64 * 4, &roi64x64),
-#ifdef WITH_SSE2
- TRUE, sse2_RGBToRGB_16s8u_P3AC4R(
- (const INT16**) src1, 64 * 2, (BYTE*) dst, 64 * 4, &roi64x64),
- PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
-#else
- FALSE, PRIM_NOP, 0, FALSE,
-#endif
- FALSE, dst = dst);
-
-int test_RGBToRGB_16s8u_P3AC4R_speed(void)
+static BOOL test_RGBToRGB_16s8u_P3AC4R_speed(void)
{
- INT16 ALIGN(r[4096]), ALIGN(g[4096]), ALIGN(b[4096]);
- UINT32 ALIGN(dst[4096]);
+ const prim_size_t roi64x64 = { 64, 64 };
+ INT16 ALIGN(r[4096+1]), ALIGN(g[4096+1]), ALIGN(b[4096+1]);
+ UINT32 ALIGN(dst[4096+1]);
int i;
INT16* ptrs[3];
- int size_array[] = { 64 };
+
winpr_RAND((BYTE*)r, sizeof(r));
winpr_RAND((BYTE*)g, sizeof(g));
winpr_RAND((BYTE*)b, sizeof(b));
b[i] &= 0x00FFU;
}
- ptrs[0] = r;
- ptrs[1] = g;
- ptrs[2] = b;
- rgb_to_argb_speed("RGBToARGB", "aligned",
- (const INT16**) ptrs, NULL, 0, dst,
- size_array, 1, RGB_TRIAL_ITERATIONS, TEST_TIME);
- return SUCCESS;
+ ptrs[0] = r+1;
+ ptrs[1] = g+1;
+ ptrs[2] = b+1;
+
+ if (!speed_test("RGBToRGB_16s8u_P3AC4R", "aligned", g_Iterations,
+ (speed_test_fkt)generic->RGBToRGB_16s8u_P3AC4R,
+ (speed_test_fkt)optimized->RGBToRGB_16s8u_P3AC4R,
+ (const INT16**) ptrs, 64 * 2, (BYTE*) dst, 64 * 4, &roi64x64))
+ return FALSE;
+
+ if (!speed_test("RGBToRGB_16s8u_P3AC4R", "unaligned", g_Iterations,
+ (speed_test_fkt)generic->RGBToRGB_16s8u_P3AC4R,
+ (speed_test_fkt)optimized->RGBToRGB_16s8u_P3AC4R,
+ (const INT16**) ptrs, 64 * 2, ((BYTE*) dst)+1, 64 * 4, &roi64x64))
+ return FALSE;
+
+ return TRUE;
}
/* ========================================================================= */
-int test_yCbCrToRGB_16s16s_P3P3_func(void)
+static BOOL test_yCbCrToRGB_16s16s_P3P3_func(void)
{
+ pstatus_t status;
INT16 ALIGN(y[4096]), ALIGN(cb[4096]), ALIGN(cr[4096]);
INT16 ALIGN(r1[4096]), ALIGN(g1[4096]), ALIGN(b1[4096]);
INT16 ALIGN(r2[4096]), ALIGN(g2[4096]), ALIGN(b2[4096]);
int i;
- int failed = 0;
- char testStr[256];
const INT16* in[3];
INT16* out1[3];
INT16* out2[3];
prim_size_t roi = { 64, 64 };
- testStr[0] = '\0';
+
winpr_RAND((BYTE*)y, sizeof(y));
winpr_RAND((BYTE*)cb, sizeof(cb));
winpr_RAND((BYTE*)cr, sizeof(cr));
out2[0] = r2;
out2[1] = g2;
out2[2] = b2;
- generic->yCbCrToRGB_16s16s_P3P3(in, 64 * 2, out1, 64 * 2, &roi);
-#ifdef WITH_SSE2
- if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE))
- {
- strcat(testStr, " SSE2");
- sse2_yCbCrToRGB_16s16s_P3P3(in, 64 * 2, out2, 64 * 2, &roi);
+ status = generic->yCbCrToRGB_16s16s_P3P3(in, 64 * 2, out1, 64 * 2, &roi);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
- for (i = 0; i < 4096; ++i)
+ status = optimized->yCbCrToRGB_16s16s_P3P3(in, 64 * 2, out2, 64 * 2, &roi);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ for (i = 0; i < 4096; ++i)
+ {
+ if ((ABS(r1[i] - r2[i]) > 1)
+ || (ABS(g1[i] - g2[i]) > 1)
+ || (ABS(b1[i] - b2[i]) > 1))
{
- if ((ABS(r1[i] - r2[i]) > 1)
- || (ABS(g1[i] - g2[i]) > 1)
- || (ABS(b1[i] - b2[i]) > 1))
- {
- printf("YCbCrToRGB-SSE FAIL[%d]: %d,%d,%d vs %d,%d,%d\n", i,
- r1[i], g1[i], b1[i], r2[i], g2[i], b2[i]);
- failed = 1;
- }
+ printf("YCbCrToRGB-SSE FAIL[%d]: %d,%d,%d vs %d,%d,%d\n", i,
+ r1[i], g1[i], b1[i], r2[i], g2[i], b2[i]);
+ return FALSE;
}
}
-#endif /* i386 */
-
- if (!failed) printf("All yCbCrToRGB_16s16s_P3P3 tests passed (%s).\n", testStr);
-
- return (failed > 0) ? FAILURE : SUCCESS;
+ return TRUE;
}
/* ------------------------------------------------------------------------- */
-STD_SPEED_TEST(
- ycbcr_to_rgb_speed, INT16*, INT16*, dst = dst,
- TRUE, generic->yCbCrToRGB_16s16s_P3P3(src1, 64 * 2, dst, 64 * 2, &roi64x64),
-#ifdef WITH_SSE2
- TRUE, sse2_yCbCrToRGB_16s16s_P3P3(src1, 64 * 2, dst, 64 * 2, &roi64x64),
- PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
-#elif defined(WITH_NEON)
- TRUE, neon_yCbCrToRGB_16s16s_P3P3(src1, 64 * 2, dst, 64 * 2, &roi64x64),
- PF_ARM_NEON_INSTRUCTIONS_AVAILABLE, FALSE,
-#else
- FALSE, PRIM_NOP, 0, FALSE,
-#endif
- FALSE, dst = dst);
-
static int test_yCbCrToRGB_16s16s_P3P3_speed(void)
{
+ prim_size_t roi = { 64, 64 };
INT16 ALIGN(y[4096]), ALIGN(cb[4096]), ALIGN(cr[4096]);
INT16 ALIGN(r[4096]), ALIGN(g[4096]), ALIGN(b[4096]);
int i;
const INT16* input[3];
INT16* output[3];
- int size_array[] = { 64 };
+
winpr_RAND((BYTE*)y, sizeof(y));
winpr_RAND((BYTE*)cb, sizeof(cb));
winpr_RAND((BYTE*)cr, sizeof(cr));
output[0] = r;
output[1] = g;
output[2] = b;
- ycbcr_to_rgb_speed("yCbCrToRGB", "aligned", input, NULL, NULL, output,
- size_array, 1, YCBCR_TRIAL_ITERATIONS, TEST_TIME);
- return SUCCESS;
+
+ if (!speed_test("yCbCrToRGB_16s16s_P3P3", "aligned", g_Iterations,
+ (speed_test_fkt)generic->yCbCrToRGB_16s16s_P3P3,
+ (speed_test_fkt)optimized->yCbCrToRGB_16s16s_P3P3,
+ input, 64 * 2, output, 64 * 2, &roi))
+ return FALSE;
+
+ return TRUE;
}
int TestPrimitivesColors(int argc, char* argv[])
{
- int status;
- status = test_RGBToRGB_16s8u_P3AC4R_func();
+ prim_test_setup(FALSE);
- if (status != SUCCESS)
+ if (!test_RGBToRGB_16s8u_P3AC4R_func())
return 1;
if (g_TestPrimitivesPerformance)
{
- status = test_RGBToRGB_16s8u_P3AC4R_speed();
-
- if (status != SUCCESS)
+ if (!test_RGBToRGB_16s8u_P3AC4R_speed())
return 1;
}
- status = test_yCbCrToRGB_16s16s_P3P3_func();
-
- if (status != SUCCESS)
+ if (!test_yCbCrToRGB_16s16s_P3P3_func())
return 1;
if (g_TestPrimitivesPerformance)
{
- status = test_yCbCrToRGB_16s16s_P3P3_speed();
-
- if (status != SUCCESS)
+ if (!test_yCbCrToRGB_16s16s_P3P3_speed())
return 1;
}
#include <winpr/sysinfo.h>
#include "prim_test.h"
-static const int MEMCPY_PRETEST_ITERATIONS = 1000000;
-static const int TEST_TIME = 1.0; // seconds
#define COPY_TESTSIZE (256*2+16*2+15+15)
/* ------------------------------------------------------------------------- */
-static int test_copy8u_func(void)
+static BOOL test_copy8u_func(void)
{
primitives_t* prims = primitives_get();
BYTE ALIGN(data[COPY_TESTSIZE + 15]);
int i, soff;
- int failed = 0;
- char testStr[256];
BYTE ALIGN(dest[COPY_TESTSIZE + 15]);
- testStr[0] = '\0';
+
winpr_RAND(data, sizeof(data));
- strcat(testStr, " ptr");
for (soff = 0; soff < 16; ++soff)
{
for (length = 1; length <= COPY_TESTSIZE - doff; ++length)
{
memset(dest, 0, sizeof(dest));
- prims->copy_8u(data + soff, dest + doff, length);
+ if (prims->copy_8u(data + soff, dest + doff, length) != PRIMITIVES_SUCCESS)
+ return FALSE;
for (i = 0; i < length; ++i)
{
"data[%d]=0x%02x\n",
doff, length, i + doff, dest[i + doff],
i + soff, data[i + soff]);
- failed = 1;
+ return FALSE;
}
}
}
}
}
- if (!failed) printf("All copy8 tests passed (%s).\n", testStr);
-
- return (failed > 0) ? FAILURE : SUCCESS;
+ return TRUE;
}
/* ------------------------------------------------------------------------- */
-STD_SPEED_TEST(copy8u_speed_test, BYTE, BYTE, dst = dst,
- TRUE, memcpy(dst, src1, size),
- FALSE, PRIM_NOP, 0, FALSE,
- TRUE, ippsCopy_8u(src1, dst, size));
-
-int test_copy8u_speed(void)
+static BOOL test_copy8u_speed(void)
{
BYTE ALIGN(src[MAX_TEST_SIZE + 4]);
BYTE ALIGN(dst[MAX_TEST_SIZE + 4]);
- copy8u_speed_test("copy8u", "aligned", src, NULL, 0, dst,
- test_sizes, NUM_TEST_SIZES, MEMCPY_PRETEST_ITERATIONS, TEST_TIME);
- copy8u_speed_test("copy8u", "unaligned", src + 1, NULL, 0, dst,
- test_sizes, NUM_TEST_SIZES, MEMCPY_PRETEST_ITERATIONS, TEST_TIME);
- return SUCCESS;
+
+ if (!speed_test("copy_8u", "aligned", g_Iterations,
+ (speed_test_fkt)generic->copy_8u,
+ (speed_test_fkt)optimized->copy_8u,
+ src, dst, MAX_TEST_SIZE))
+ return FALSE;
+
+ if (!speed_test("copy_8u", "unaligned", g_Iterations,
+ (speed_test_fkt)generic->copy_8u,
+ (speed_test_fkt)optimized->copy_8u,
+ src+1, dst+1, MAX_TEST_SIZE))
+ return FALSE;
+
+ return TRUE;
}
int TestPrimitivesCopy(int argc, char* argv[])
{
- int status;
- status = test_copy8u_func();
+ prim_test_setup(FALSE);
- if (status != SUCCESS)
+ if (!test_copy8u_func())
return 1;
if (g_TestPrimitivesPerformance)
{
- status = test_copy8u_speed();
-
- if (status != SUCCESS)
+ if (!test_copy8u_speed())
return 1;
}
{
UINT32 len;
- memset(dest, 0, sizeof(dest));
+ memset(dest, 3, sizeof(dest));
for (len = 1; len < 48 - off; ++len)
{
status = generic->set_8u(0xa5, dest + off, len);
if (status != PRIMITIVES_SUCCESS)
return FALSE;
- if (!check8(dest, len, off, 0xa8))
+ if (!check8(dest, len, off, 0xa5))
return FALSE;
}
}
{
UINT32 len;
- memset(dest, 0, sizeof(dest));
+ memset(dest, 3, sizeof(dest));
for (len = 1; len < 48 - off; ++len)
{
status = optimized->set_8u(0xa5, dest + off, len);
if (status != PRIMITIVES_SUCCESS)
return FALSE;
- if (!check8(dest, len, off, 0xa8))
+ if (!check8(dest, len, off, 0xa5))
return FALSE;
}
}
{
winpr_RAND(&value, sizeof(value));
if (!speed_test("set_8u", "", g_Iterations,
- generic->set_8u, optimized->set_8u,
- value, dest + x, len))
+ (speed_test_fkt)generic->set_8u,
+ (speed_test_fkt)optimized->set_8u,
+ value, dest + x, x))
return FALSE;
}
{
winpr_RAND(&value, sizeof(value));
if (!speed_test("set_32u", "", g_Iterations,
- generic->set_32u, optimized->set_32u,
- value, dest + x, len))
+ (speed_test_fkt)generic->set_32u,
+ (speed_test_fkt)optimized->set_32u,
+ value, dest + x, x))
return FALSE;
}
{
winpr_RAND(&value, sizeof(value));
if (!speed_test("set_32s", "", g_Iterations,
- generic->set_32s, optimized->set_32s,
- value, dest + x, len))
+ (speed_test_fkt)generic->set_32s,
+ (speed_test_fkt)optimized->set_32s,
+ value, dest + x, x))
return FALSE;
}
if (!test_set8u_func())
return -1;
-
- if (!test_set8u_speed())
- return -1;
-
if (!test_set32s_func())
return -1;
-
- if (!test_set32s_speed())
- return -1;
-
if (!test_set32u_func())
return -1;
- if (!test_set32u_speed())
- return -1;
+ if (g_TestPrimitivesPerformance)
+ {
+ if (!test_set8u_speed())
+ return -1;
+ if (!test_set32s_speed())
+ return -1;
+ if (!test_set32u_speed())
+ return -1;
+ }
return 0;
}
#include "prim_test.h"
#define FUNC_TEST_SIZE 65536
-static const int SHIFT_PRETEST_ITERATIONS = 50000;
-static const float TEST_TIME = 1.0;
-
-#ifdef WITH_SSE2
-#define SHIFT_TEST_FUNC(_name_, _type_, _str_, _f1_, _f2_) \
- int _name_(void) \
- { \
- _type_ ALIGN(src[FUNC_TEST_SIZE+3]), \
- ALIGN(d1[FUNC_TEST_SIZE+3]), ALIGN(d2[FUNC_TEST_SIZE+3]); \
- int failed = 0; \
- int i; \
- char testStr[256]; \
- testStr[0] = '\0'; \
- get_random_data(src, sizeof(src)); \
- _f1_(src+1, 3, d1+1, FUNC_TEST_SIZE); \
- if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) \
- { \
- strcat(testStr, " SSE3"); \
- /* Aligned */ \
- _f2_(src+1, 3, d2+1, FUNC_TEST_SIZE); \
- for (i=1; i<=FUNC_TEST_SIZE; ++i) \
- { \
- if (d1[i] != d2[i]) \
- { \
- printf("%s-SSE-aligned FAIL[%d]: 0x%x>>3=0x%x, got 0x%x\n", \
- _str_, i, src[i], d1[i], d2[i]); \
- ++failed; \
- } \
- } \
- /* Unaligned */ \
- _f2_(src+1, 3, d2+2, FUNC_TEST_SIZE); \
- for (i=1; i<=FUNC_TEST_SIZE; ++i) \
- { \
- if (d1[i] != d2[i+1]) \
- { \
- printf("%s-SSE-unaligned FAIL[%d]: 0x%x>>3=0x%x, got 0x%x\n", \
- _str_, i, src[i], d1[i], d2[i+1]); \
- ++failed; \
- } \
- } \
- } \
- if (!failed) printf("All %s tests passed (%s).\n", _str_, testStr); \
- return (failed > 0) ? FAILURE : SUCCESS; \
- }
-#else
-#define SHIFT_TEST_FUNC(_name_, _type_, _str_, _f1_, _f2_) \
- int _name_(void) \
- { \
- return SUCCESS; \
- }
-#endif /* i386 */
-
-SHIFT_TEST_FUNC(test_lShift_16s_func, INT16, "lshift_16s", general_lShiftC_16s,
- sse2_lShiftC_16s)
-SHIFT_TEST_FUNC(test_lShift_16u_func, UINT16, "lshift_16u", general_lShiftC_16u,
- sse2_lShiftC_16u)
-SHIFT_TEST_FUNC(test_rShift_16s_func, INT16, "rshift_16s", general_rShiftC_16s,
- sse2_rShiftC_16s)
-SHIFT_TEST_FUNC(test_rShift_16u_func, UINT16, "rshift_16u", general_rShiftC_16u,
- sse2_rShiftC_16u)
-
-/* ========================================================================= */
-STD_SPEED_TEST(speed_lShift_16s, INT16, INT16, dst = dst,
- TRUE, general_lShiftC_16s(src1, constant, dst, size),
-#ifdef WITH_SSE2
- TRUE, sse2_lShiftC_16s(src1, constant, dst, size),
- PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
-#else
- FALSE, PRIM_NOP, 0, FALSE,
-#endif
- TRUE, ippsLShiftC_16s(src1, constant, dst, size));
-STD_SPEED_TEST(speed_lShift_16u, UINT16, UINT16, dst = dst,
- TRUE, general_lShiftC_16u(src1, constant, dst, size),
-#ifdef WITH_SSE2
- TRUE, sse2_lShiftC_16u(src1, constant, dst, size),
- PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
-#else
- FALSE, PRIM_NOP, 0, FALSE,
-#endif
- TRUE, ippsLShiftC_16u(src1, constant, dst, size));
-STD_SPEED_TEST(speed_rShift_16s, INT16, INT16, dst = dst,
- TRUE, general_rShiftC_16s(src1, constant, dst, size),
-#ifdef WITH_SSE2
- TRUE, sse2_rShiftC_16s(src1, constant, dst, size),
- PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
-#else
- FALSE, PRIM_NOP, 0, FALSE,
-#endif
- TRUE, ippsRShiftC_16s(src1, constant, dst, size));
-STD_SPEED_TEST(speed_rShift_16u, UINT16, UINT16, dst = dst,
- TRUE, general_rShiftC_16u(src1, constant, dst, size),
-#ifdef WITH_SSE2
- TRUE, sse2_rShiftC_16u(src1, constant, dst, size),
- PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
-#else
- FALSE, PRIM_NOP, 0, FALSE,
-#endif
- TRUE, ippsRShiftC_16u(src1, constant, dst, size));
+
+static BOOL test_lShift_16s_func(void)
+{
+ pstatus_t status;
+ INT16 ALIGN(src[FUNC_TEST_SIZE+3]);
+ INT16 ALIGN(d1[FUNC_TEST_SIZE+3]);
+ UINT32 val;
+
+ winpr_RAND((BYTE*)&val, sizeof(val));
+ winpr_RAND((BYTE*)src, sizeof(src));
+
+ val = (val % (FUNC_TEST_SIZE - 1)) + 1;
+
+ /* Aligned */
+ status = generic->lShiftC_16s(src+1, val, d1+1, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->lShiftC_16s(src+1, val, d1+1, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ /* Unaligned */
+ status = generic->lShiftC_16s(src+1, val, d1+2, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->lShiftC_16s(src+1, val, d1+2, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ return TRUE;
+}
+
+static BOOL test_lShift_16u_func(void)
+{
+ pstatus_t status;
+ UINT16 ALIGN(src[FUNC_TEST_SIZE+3]);
+ UINT16 ALIGN(d1[FUNC_TEST_SIZE+3]);
+ UINT32 val;
+
+ winpr_RAND((BYTE*)&val, sizeof(val));
+ winpr_RAND((BYTE*)src, sizeof(src));
+
+ val = (val % (FUNC_TEST_SIZE - 1)) + 1;
+
+ /* Aligned */
+ status = generic->lShiftC_16u(src+1, val, d1+1, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->lShiftC_16u(src+1, val, d1+1, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ /* Unaligned */
+ status = generic->lShiftC_16u(src+1, val, d1+2, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->lShiftC_16u(src+1, val, d1+2, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ return TRUE;
+}
+
+static BOOL test_rShift_16s_func(void)
+{
+ pstatus_t status;
+ INT16 ALIGN(src[FUNC_TEST_SIZE+3]);
+ INT16 ALIGN(d1[FUNC_TEST_SIZE+3]);
+ UINT32 val;
+
+ winpr_RAND((BYTE*)&val, sizeof(val));
+ winpr_RAND((BYTE*)src, sizeof(src));
+
+ val = (val % (FUNC_TEST_SIZE - 1)) + 1;
+
+ /* Aligned */
+ status = generic->rShiftC_16s(src+1, val, d1+1, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->rShiftC_16s(src+1, val, d1+1, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ /* Unaligned */
+ status = generic->rShiftC_16s(src+1, val, d1+2, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->rShiftC_16s(src+1, val, d1+2, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ return TRUE;
+}
+
+static BOOL test_rShift_16u_func(void)
+{
+ pstatus_t status;
+ UINT16 ALIGN(src[FUNC_TEST_SIZE+3]);
+ UINT16 ALIGN(d1[FUNC_TEST_SIZE+3]);
+ UINT32 val;
+
+ winpr_RAND((BYTE*)&val, sizeof(val));
+ winpr_RAND((BYTE*)src, sizeof(src));
+
+ val = (val % (FUNC_TEST_SIZE - 1)) + 1;
+
+ /* Aligned */
+ status = generic->rShiftC_16u(src+1, val, d1+1, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->rShiftC_16u(src+1, val, d1+1, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ /* Unaligned */
+ status = generic->rShiftC_16u(src+1, val, d1+2, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->rShiftC_16u(src+1, val, d1+2, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ return TRUE;
+}
+
+static BOOL test_ShiftWrapper_16s_func(void)
+{
+ pstatus_t status;
+ INT16 ALIGN(src[FUNC_TEST_SIZE+3]);
+ INT16 ALIGN(d1[FUNC_TEST_SIZE+3]);
+ UINT32 tmp;
+ INT32 val;
+
+ winpr_RAND((BYTE*)&tmp, sizeof(tmp));
+ winpr_RAND((BYTE*)src, sizeof(src));
+
+ val = (tmp % (FUNC_TEST_SIZE - 1)) + 1;
+
+ /* Aligned */
+ status = generic->shiftC_16s(src+1, val, d1+1, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->shiftC_16s(src+1, val, d1+1, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ status = generic->shiftC_16s(src+1, -val, d1+1, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->shiftC_16s(src+1, -val, d1+1, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ /* Unaligned */
+ status = generic->shiftC_16s(src+1, val, d1+2, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->shiftC_16s(src+1, val, d1+2, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = generic->shiftC_16s(src+1, -val, d1+2, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->shiftC_16s(src+1, -val, d1+2, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ return TRUE;
+}
+
+static BOOL test_ShiftWrapper_16u_func(void)
+{
+ pstatus_t status;
+ UINT16 ALIGN(src[FUNC_TEST_SIZE+3]);
+ UINT16 ALIGN(d1[FUNC_TEST_SIZE+3]);
+ UINT32 tmp;
+ INT32 val;
+
+ winpr_RAND((BYTE*)&tmp, sizeof(tmp));
+ winpr_RAND((BYTE*)src, sizeof(src));
+
+ val = (tmp % (FUNC_TEST_SIZE - 1)) + 1;
+
+ /* Aligned */
+ status = generic->shiftC_16u(src+1, val, d1+1, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->shiftC_16u(src+1, val, d1+1, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ status = generic->shiftC_16u(src+1, -val, d1+1, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->shiftC_16u(src+1, -val, d1+1, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ /* Unaligned */
+ status = generic->shiftC_16u(src+1, val, d1+2, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->shiftC_16u(src+1, val, d1+2, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = generic->shiftC_16u(src+1, -val, d1+2, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->shiftC_16u(src+1, -val, d1+2, FUNC_TEST_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ return TRUE;
+}
/* ------------------------------------------------------------------------- */
-int test_lShift_16s_speed(void)
+static BOOL test_lShift_16s_speed(void)
{
- INT16 ALIGN(src[MAX_TEST_SIZE + 1]), ALIGN(dst[MAX_TEST_SIZE + 1]);
- winpr_RAND(src, sizeof(src));
- speed_lShift_16s("lShift_16s", "aligned", src, NULL, 3, dst,
- test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
- speed_lShift_16s("lShift_16s", "unaligned", src + 1, NULL, 3, dst,
- test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
- return SUCCESS;
+ UINT32 val;
+ INT16 ALIGN(src[MAX_TEST_SIZE+1]), ALIGN(dst[MAX_TEST_SIZE+1]);
+
+ winpr_RAND((BYTE*)src, sizeof(src));
+ winpr_RAND((BYTE*)&val, sizeof(val));
+
+ if (!speed_test("lShift_16s", "aligned", g_Iterations,
+ (speed_test_fkt)generic->lShiftC_16s,
+ (speed_test_fkt)optimized->lShiftC_16s, src, val,
+ dst, MAX_TEST_SIZE))
+ return FALSE;
+
+ if (!speed_test("lShift_16s", "unaligned", g_Iterations,
+ (speed_test_fkt)generic->lShiftC_16s,
+ (speed_test_fkt)optimized->lShiftC_16s, src + 1, val,
+ dst, MAX_TEST_SIZE))
+ return FALSE;
+
+ return TRUE;
}
/* ------------------------------------------------------------------------- */
-int test_lShift_16u_speed(void)
+static BOOL test_lShift_16u_speed(void)
{
+ UINT32 val;
UINT16 ALIGN(src[MAX_TEST_SIZE + 1]), ALIGN(dst[MAX_TEST_SIZE + 1]);
- winpr_RAND(src, sizeof(src));
- speed_lShift_16u("lShift_16u", "aligned", src, NULL, 3, dst,
- test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
- speed_lShift_16u("lShift_16u", "unaligned", src + 1, NULL, 3, dst,
- test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
- return SUCCESS;
+
+ winpr_RAND((BYTE*)&val, sizeof(val));
+ winpr_RAND((BYTE*)src, sizeof(src));
+
+ if (!speed_test("lShift_16u", "aligned", g_Iterations,
+ (speed_test_fkt)generic->lShiftC_16u,
+ (speed_test_fkt)optimized->lShiftC_16u, src, val,
+ dst, MAX_TEST_SIZE))
+ return FALSE;
+
+ if (!speed_test("lShift_16u", "unaligned", g_Iterations,
+ (speed_test_fkt)generic->lShiftC_16u,
+ (speed_test_fkt)optimized->lShiftC_16u, src + 1, val,
+ dst, MAX_TEST_SIZE))
+ return FALSE;
+
+ return TRUE;
}
/* ------------------------------------------------------------------------- */
-int test_rShift_16s_speed(void)
+static BOOL test_rShift_16s_speed(void)
{
- INT16 ALIGN(src[MAX_TEST_SIZE + 1]), ALIGN(dst[MAX_TEST_SIZE + 1]);
- winpr_RAND(src, sizeof(src));
- speed_rShift_16s("rShift_16s", "aligned", src, NULL, 3, dst,
- test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
- speed_rShift_16s("rShift_16s", "unaligned", src + 1, NULL, 3, dst,
- test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
- return SUCCESS;
+ UINT32 val;
+ INT16 ALIGN(src[MAX_TEST_SIZE+1]), ALIGN(dst[MAX_TEST_SIZE+1]);
+
+ winpr_RAND((BYTE*)src, sizeof(src));
+ winpr_RAND((BYTE*)&val, sizeof(val));
+ if (!speed_test("rShift_16s", "aligned", g_Iterations,
+ (speed_test_fkt)generic->rShiftC_16s,
+ (speed_test_fkt)optimized->rShiftC_16s, src, val,
+ dst, MAX_TEST_SIZE))
+ return FALSE;
+
+ if (!speed_test("rShift_16s", "unaligned", g_Iterations,
+ (speed_test_fkt)generic->rShiftC_16s,
+ (speed_test_fkt)optimized->rShiftC_16s, src + 1, val,
+ dst, MAX_TEST_SIZE))
+ return FALSE;
+
+ return TRUE;
}
/* ------------------------------------------------------------------------- */
-int test_rShift_16u_speed(void)
+static BOOL test_rShift_16u_speed(void)
{
+ UINT32 val;
UINT16 ALIGN(src[MAX_TEST_SIZE + 1]), ALIGN(dst[MAX_TEST_SIZE + 1]);
- winpr_RAND(src, sizeof(src));
- speed_rShift_16u("rShift_16u", "aligned", src, NULL, 3, dst,
- test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
- speed_rShift_16u("rShift_16u", "unaligned", src + 1, NULL, 3, dst,
- test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
- return SUCCESS;
+
+ winpr_RAND((BYTE*)&val, sizeof(val));
+ winpr_RAND((BYTE*)src, sizeof(src));
+
+ if (!speed_test("rShift_16u", "aligned", g_Iterations,
+ (speed_test_fkt)generic->rShiftC_16u,
+ (speed_test_fkt)optimized->rShiftC_16u, src, val,
+ dst, MAX_TEST_SIZE))
+ return FALSE;
+
+ if (!speed_test("rShift_16u", "unaligned", g_Iterations,
+ (speed_test_fkt)generic->rShiftC_16u,
+ (speed_test_fkt)optimized->rShiftC_16u, src + 1, val,
+ dst, MAX_TEST_SIZE))
+ return FALSE;
+
+ return TRUE;
}
int TestPrimitivesShift(int argc, char* argv[])
{
- int status;
- status = test_lShift_16s_func();
+ prim_test_setup(FALSE);
- if (status != SUCCESS)
+ if (!test_lShift_16s_func())
return 1;
if (g_TestPrimitivesPerformance)
{
- status = test_lShift_16s_speed();
-
- if (status != SUCCESS)
+ if (!test_lShift_16s_speed())
return 1;
}
- status = test_lShift_16u_func();
-
- if (status != SUCCESS)
+ if (!test_lShift_16u_func())
return 1;
if (g_TestPrimitivesPerformance)
{
- status = test_lShift_16u_speed();
-
- if (status != SUCCESS)
+ if (!test_lShift_16u_speed())
return 1;
}
- status = test_rShift_16s_func();
-
- if (status != SUCCESS)
+ if (!test_rShift_16s_func())
return 1;
if (g_TestPrimitivesPerformance)
{
- status = test_rShift_16s_speed();
-
- if (status != SUCCESS)
+ if (!test_rShift_16s_speed())
return 1;
}
- status = test_rShift_16u_func();
-
- if (status != SUCCESS)
+ if (!test_rShift_16u_func())
return 1;
if (g_TestPrimitivesPerformance)
{
- status = test_rShift_16u_speed();
-
- if (status != SUCCESS)
+ if (!test_rShift_16u_speed())
return 1;
}
+ if (!test_ShiftWrapper_16s_func())
+ return 1;
+
+ if (!test_ShiftWrapper_16u_func())
+ return 1;
+
return 0;
}
#include <winpr/sysinfo.h>
#include "prim_test.h"
-static const int SIGN_PRETEST_ITERATIONS = 100000;
-static const float TEST_TIME = 1.0;
+#define TEST_BUFFER_SIZE 65535
/* ------------------------------------------------------------------------- */
-static int test_sign16s_func(void)
+static BOOL test_sign16s_func(void)
{
- INT16 ALIGN(src[65535]), ALIGN(d1[65535]);
-#ifdef WITH_SSE2
- INT16 ALIGN(d2[65535]);
- int i;
-#endif
- int failed = 0;
- char testStr[256];
- /* Test when we can reach 16-byte alignment */
- testStr[0] = '\0';
- winpr_RAND(src, sizeof(src));
- general_sign_16s(src + 1, d1 + 1, 65535);
-#ifdef WITH_SSE2
-
- if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
- {
- strcat(testStr, " SSSE3");
- ssse3_sign_16s(src + 1, d2 + 1, 65535);
-
- for (i = 1; i < 65535; ++i)
- {
- if (d1[i] != d2[i])
- {
- printf("SIGN16s-SSE-aligned FAIL[%d] of %d: want %d, got %d\n",
- i, src[i], d1[i], d2[i]);
- ++failed;
- }
- }
- }
-
-#endif /* i386 */
- /* Test when we cannot reach 16-byte alignment */
- winpr_RAND(src, sizeof(src));
- general_sign_16s(src + 1, d1 + 2, 65535);
-#ifdef WITH_SSE2
-
- if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
- {
- ssse3_sign_16s(src + 1, d2 + 2, 65535);
-
- for (i = 2; i < 65535; ++i)
- {
- if (d1[i] != d2[i])
- {
- printf("SIGN16s-SSE-unaligned FAIL[%d] of %d: want %d, got %d\n",
- i, src[i - 1], d1[i], d2[i]);
- ++failed;
- }
- }
- }
-
-#endif /* i386 */
-
- if (!failed) printf("All sign16s tests passed (%s).\n", testStr);
-
- return (failed > 0) ? FAILURE : SUCCESS;
+ pstatus_t status;
+ INT16 ALIGN(src[TEST_BUFFER_SIZE]);
+ INT16 ALIGN(d1[TEST_BUFFER_SIZE]);
+ INT16 ALIGN(d2[TEST_BUFFER_SIZE]);
+
+ winpr_RAND((BYTE*)src, sizeof(src));
+
+ status = generic->sign_16s(src + 1, d1 + 1, TEST_BUFFER_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->sign_16s(src + 1, d2 + 1, TEST_BUFFER_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ if (memcmp(d1, d2, sizeof(d1)) != 0)
+ return FALSE;
+
+ status = generic->sign_16s(src + 1, d1 + 2, TEST_BUFFER_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->sign_16s(src + 1, d2 + 2, TEST_BUFFER_SIZE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ if (memcmp(d1, d2, sizeof(d1)) != 0)
+ return FALSE;
+
+ return TRUE;
}
-/* ------------------------------------------------------------------------- */
-STD_SPEED_TEST(sign16s_speed_test, INT16, INT16, dst = dst,
- TRUE, general_sign_16s(src1, dst, size),
-#ifdef WITH_SSE2
- TRUE, ssse3_sign_16s(src1, dst, size), PF_EX_SSSE3, TRUE,
-#else
- FALSE, PRIM_NOP, 0, FALSE,
-#endif
- FALSE, dst = dst);
-
static int test_sign16s_speed(void)
{
INT16 ALIGN(src[MAX_TEST_SIZE + 3]), ALIGN(dst[MAX_TEST_SIZE + 3]);
- winpr_RAND(src, sizeof(src));
- sign16s_speed_test("sign16s", "aligned", src, NULL, 0, dst,
- test_sizes, NUM_TEST_SIZES, SIGN_PRETEST_ITERATIONS, TEST_TIME);
- sign16s_speed_test("sign16s", "unaligned", src + 1, NULL, 0, dst,
- test_sizes, NUM_TEST_SIZES, SIGN_PRETEST_ITERATIONS, TEST_TIME);
- return SUCCESS;
+ winpr_RAND((BYTE*)src, sizeof(src));
+
+ if (!speed_test("sign16s", "aligned", g_Iterations,
+ (speed_test_fkt)generic->sign_16s,
+ (speed_test_fkt)optimized->sign_16s, src + 1, dst + 1,
+ MAX_TEST_SIZE))
+ return FALSE;
+
+ if (!speed_test("sign16s", "unaligned", g_Iterations,
+ (speed_test_fkt)generic->sign_16s,
+ (speed_test_fkt)optimized->sign_16s, src + 1, dst + 2,
+ MAX_TEST_SIZE))
+ return FALSE;
+
+ return TRUE;
}
int TestPrimitivesSign(int argc, char* argv[])
{
- int status;
- status = test_sign16s_func();
+ prim_test_setup(FALSE);
- if (status != SUCCESS)
+ if (!test_sign16s_func())
return 1;
if (g_TestPrimitivesPerformance)
{
- status = test_sign16s_speed();
-
- if (status != SUCCESS)
+ if (!test_sign16s_speed())
return 1;
}
#include <winpr/sysinfo.h>
#include "prim_test.h"
-static const int YCOCG_TRIAL_ITERATIONS = 20000;
-static const float TEST_TIME = 4.0;
-
/* ------------------------------------------------------------------------- */
-int test_YCoCgRToRGB_8u_AC4R_func(void)
+static BOOL test_YCoCgRToRGB_8u_AC4R_func(void)
{
-#ifdef WITH_SSE2
- int i;
+ BOOL result = TRUE;
+ pstatus_t status;
INT32 ALIGN(out_sse[4098]), ALIGN(out_sse_inv[4098]);
-#endif
INT32 ALIGN(in[4098]);
INT32 ALIGN(out_c[4098]), ALIGN(out_c_inv[4098]);
- char testStr[256];
- BOOL failed = FALSE;
- testStr[0] = '\0';
- winpr_RAND(in, sizeof(in));
- general_YCoCgToRGB_8u_AC4R((const BYTE*)(in + 1), 63 * 4,
- (BYTE*) out_c, 63 * 4, 63, 61, 2, TRUE, FALSE);
- general_YCoCgToRGB_8u_AC4R((const BYTE*)(in + 1), 63 * 4,
- (BYTE*) out_c_inv, 63 * 4, 63, 61, 2, TRUE, TRUE);
-#ifdef WITH_SSE2
-
- if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
+
+ UINT32 i, x;
+ const UINT32 formats[] = {
+ PIXEL_FORMAT_ARGB32,
+ PIXEL_FORMAT_ABGR32,
+ PIXEL_FORMAT_RGBA32,
+ PIXEL_FORMAT_RGBX32,
+ PIXEL_FORMAT_BGRA32,
+ PIXEL_FORMAT_BGRX32
+ };
+
+ winpr_RAND((BYTE*)in, sizeof(in));
+
+ for (x=0; x<sizeof(formats)/sizeof(formats[0]); x++)
{
- strcat(testStr, " SSSE3");
- ssse3_YCoCgRToRGB_8u_AC4R((const BYTE*)(in + 1), 63 * 4,
- (BYTE*) out_sse, 63 * 4, 63, 61, 2, TRUE, FALSE);
+ UINT32 format = formats[x];
+
+ status = generic->YCoCgToRGB_8u_AC4R(
+ (const BYTE*)(in + 1), 63 * 4,
+ (BYTE*) out_c, format, 63 * 4, 63, 61, 2, TRUE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = generic->YCoCgToRGB_8u_AC4R(
+ (const BYTE*)(in + 1), 63 * 4,
+ (BYTE*) out_c_inv, format, 63 * 4, 63, 61, 2, TRUE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+
+ status = optimized->YCoCgToRGB_8u_AC4R(
+ (const BYTE*)(in + 1), 63 * 4,
+ (BYTE*) out_sse, format, 63 * 4, 63, 61, 2, TRUE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
+ status = optimized->YCoCgToRGB_8u_AC4R(
+ (const BYTE*)(in + 1), 63 * 4,
+ (BYTE*) out_sse_inv, format, 63 * 4, 63, 61, 2, TRUE);
+ if (status != PRIMITIVES_SUCCESS)
+ return FALSE;
for (i = 0; i < 63 * 61; ++i)
{
if (out_c[i] != out_sse[i])
{
- printf("YCoCgRToRGB-SSE FAIL[%d]: 0x%08x -> C 0x%08x vs SSE 0x%08x\n", i,
+ printf("optimized->YCoCgRToRGB FAIL[%d]: 0x%08x -> C 0x%08x vs optimized 0x%08x\n", i,
in[i + 1], out_c[i], out_sse[i]);
- failed = TRUE;
+ result = FALSE;
}
}
- ssse3_YCoCgRToRGB_8u_AC4R((const BYTE*)(in + 1), 63 * 4,
- (BYTE*) out_sse_inv, 63 * 4, 63, 61, 2, TRUE, TRUE);
-
for (i = 0; i < 63 * 61; ++i)
{
if (out_c_inv[i] != out_sse_inv[i])
{
- printf("YCoCgRToRGB-SSE inverted FAIL[%d]: 0x%08x -> C 0x%08x vs SSE 0x%08x\n",
+ printf("optimized->YCoCgRToRGB inverted FAIL[%d]: 0x%08x -> C 0x%08x vs optimized 0x%08x\n",
i,
in[i + 1], out_c_inv[i], out_sse_inv[i]);
- failed = TRUE;
+ result = FALSE;
}
}
}
-
-#endif /* i386 */
-
- if (!failed) printf("All YCoCgRToRGB_8u_AC4R tests passed (%s).\n", testStr);
-
- return (failed > 0) ? FAILURE : SUCCESS;
+ return result;
}
-/* ------------------------------------------------------------------------- */
-STD_SPEED_TEST(
- ycocg_to_rgb_speed, BYTE, BYTE, PRIM_NOP,
- TRUE, general_YCoCgToRGB_8u_AC4R(src1, 64 * 4, dst, 64 * 4, 64, 64, 2, FALSE,
- FALSE),
-#ifdef WITH_SSE2
- TRUE, ssse3_YCoCgRToRGB_8u_AC4R(src1, 64 * 4, dst, 64 * 4, 64, 64, 2, FALSE,
- FALSE),
- PF_EX_SSSE3, TRUE,
-#else
- FALSE, PRIM_NOP, 0, FALSE,
-#endif
- FALSE, PRIM_NOP);
-
static int test_YCoCgRToRGB_8u_AC4R_speed(void)
{
INT32 ALIGN(in[4096]);
INT32 ALIGN(out[4096]);
- int size_array[] = { 64 };
- winpr_RAND(in, sizeof(in));
- ycocg_to_rgb_speed("YCoCgToRGB", "aligned", (const BYTE*) in,
- 0, 0, (BYTE*) out,
- size_array, 1, YCOCG_TRIAL_ITERATIONS, TEST_TIME);
- return SUCCESS;
+
+ winpr_RAND((BYTE*)in, sizeof(in));
+
+ if (!speed_test("YCoCgToRGB_8u_AC4R", "aligned", g_Iterations,
+ (speed_test_fkt)generic->YCoCgToRGB_8u_AC4R,
+ (speed_test_fkt)optimized->YCoCgToRGB_8u_AC4R,
+ in, 64 * 4, out, 64 * 4, 64, 64, 2, FALSE, FALSE))
+ return FALSE;
+
+ return TRUE;
}
int TestPrimitivesYCoCg(int argc, char* argv[])
{
- int status;
- status = test_YCoCgRToRGB_8u_AC4R_func();
+ prim_test_setup(FALSE);
- if (status != SUCCESS)
+ if (!test_YCoCgRToRGB_8u_AC4R_func())
return 1;
if (g_TestPrimitivesPerformance)
{
- status = test_YCoCgRToRGB_8u_AC4R_speed();
-
- if (status != SUCCESS)
+ if (!test_YCoCgRToRGB_8u_AC4R_speed())
return 1;
}
winpr_RAND((BYTE*)width, sizeof(*width));
winpr_RAND((BYTE*)height, sizeof(*height));
// TODO: Algorithm only works on even resolutions...
- *width = (*width % 4000) << 1;
- *height = (*height % 4000 << 1);
+ *width = (*width % 64) << 1;
+ *height = (*height % 64 << 1);
}
static BOOL check_padding(const BYTE* psrc, size_t size, size_t padding,
if (use444)
{
- if (prims->RGBToYUV444_8u_P3AC4R(rgb, stride, yuv, yuv_step,
+ if (prims->RGBToYUV444_8u_P3AC4R(rgb, PIXEL_FORMAT_BGRA32,
+ stride, yuv, yuv_step,
&roi) != PRIMITIVES_SUCCESS)
goto fail;
}
- else if (prims->RGBToYUV420_8u_P3AC4R(rgb, stride, yuv, yuv_step,
+ else if (prims->RGBToYUV420_8u_P3AC4R(rgb, PIXEL_FORMAT_BGRA32,
+ stride, yuv, yuv_step,
&roi) != PRIMITIVES_SUCCESS)
goto fail;
UINT32 x;
int rc = -1;
+ prim_test_setup(FALSE);
+
for (x = 0; x < 10; x++)
{
- /* TODO: This test fails on value comparison,
- * there seems to be some issue left with encoder / decoder pass.
- if (!TestPrimitiveYUV(FALSE))
- goto end;
- */
if (!TestPrimitiveYUV(TRUE))
goto end;
+ if (!TestPrimitiveYUV(FALSE))
+ goto end;
+
if (!TestPrimitiveYUVCombine())
goto end;
}
extern int test_sizes[];
#define NUM_TEST_SIZES 10
-#ifndef SUCCESS
-#define SUCCESS 0
-#endif
-#ifndef FAILURE
-#define FAILURE 1
-#endif
-
extern BOOL g_TestPrimitivesPerformance;
extern UINT32 g_Iterations;
void prim_test_setup(BOOL performance);
+typedef pstatus_t (*speed_test_fkt)();
+
BOOL speed_test(const char* name, const char* dsc, UINT32 iterations,
- pstatus_t (*generic)(), pstatus_t (*optimised)(),
+ speed_test_fkt generic, speed_test_fkt optimized,
...);
#endif // !__PRIMTEST_H_INCLUDED__