From d33d485e83e02ed7f367ee543592b053d45c62fc Mon Sep 17 00:00:00 2001 From: Alan Curry Date: Sat, 11 Feb 2006 14:16:10 +0000 Subject: [PATCH] Move the v{Y,C}CoeffsBank vectors into the SwsContext, filling them in just once when the scaler is initialized, instead of building them and freeing them over and over. This gives massive performance improvements. patch by Alan Curry, pacman*at*TheWorld*dot*com Originally committed as revision 17589 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc --- postproc/swscale.c | 25 +++++++++++++++++++++++++ postproc/swscale_internal.h | 1 + postproc/yuv2rgb_altivec.c | 28 ++-------------------------- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/postproc/swscale.c b/postproc/swscale.c index e4537f7..6f9c203 100644 --- a/postproc/swscale.c +++ b/postproc/swscale.c @@ -2110,6 +2110,25 @@ SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4, (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, srcFilter->chrV, dstFilter->chrV, c->param); + +#ifdef HAVE_ALTIVEC + c->vYCoeffsBank = memalign (16, sizeof (vector signed short)*c->vLumFilterSize*c->dstH); + c->vCCoeffsBank = memalign (16, sizeof (vector signed short)*c->vChrFilterSize*c->dstH); + + for (i=0;ivLumFilterSize*c->dstH;i++) { + int j; + short *p = (short *)&c->vYCoeffsBank[i]; + for (j=0;j<8;j++) + p[j] = c->vLumFilter[i]; + } + + for (i=0;ivChrFilterSize*c->dstH;i++) { + int j; + short *p = (short *)&c->vCCoeffsBank[i]; + for (j=0;j<8;j++) + p[j] = c->vChrFilter[i]; + } +#endif } // Calculate Buffer Sizes so that they won't run out while handling these damn slices @@ -2644,6 +2663,12 @@ void sws_freeContext(SwsContext *c){ c->hLumFilter = NULL; if(c->hChrFilter) free(c->hChrFilter); c->hChrFilter = NULL; +#ifdef HAVE_ALTIVEC + if(c->vYCoeffsBank) free(c->vYCoeffsBank); + c->vYCoeffsBank = NULL; + if(c->vCCoeffsBank) free(c->vCCoeffsBank); + c->vCCoeffsBank = NULL; +#endif if(c->vLumFilterPos) free(c->vLumFilterPos); c->vLumFilterPos = NULL; diff --git a/postproc/swscale_internal.h b/postproc/swscale_internal.h index c6611da..b4e1dbe 100644 --- a/postproc/swscale_internal.h +++ b/postproc/swscale_internal.h @@ -154,6 +154,7 @@ typedef struct SwsContext{ vector signed short CGV; vector signed short OY; vector unsigned short CSHIFT; + vector signed short *vYCoeffsBank, *vCCoeffsBank; #endif diff --git a/postproc/yuv2rgb_altivec.c b/postproc/yuv2rgb_altivec.c index 2d2f776..dee68b2 100644 --- a/postproc/yuv2rgb_altivec.c +++ b/postproc/yuv2rgb_altivec.c @@ -774,8 +774,6 @@ altivec_yuv2packedX (SwsContext *c, uint8_t *dest, int dstW, int dstY) { int i,j; - short tmp __attribute__((aligned (16))); - int16_t *p; short *f; vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V; vector signed short R0,G0,B0,R1,G1,B1; @@ -787,29 +785,10 @@ altivec_yuv2packedX (SwsContext *c, vector unsigned short SCL = vec_splat((vector unsigned short)AVV(4),0); unsigned long scratch[16] __attribute__ ((aligned (16))); - vector signed short *vYCoeffsBank, *vCCoeffsBank; - vector signed short *YCoeffs, *CCoeffs; - vYCoeffsBank = memalign (16, sizeof (vector signed short)*lumFilterSize*c->dstH); - vCCoeffsBank = memalign (16, sizeof (vector signed short)*chrFilterSize*c->dstH); - - for (i=0;idstH;i++) { - tmp = c->vLumFilter[i]; - p = &vYCoeffsBank[i]; - for (j=0;j<8;j++) - p[j] = tmp; - } - - for (i=0;idstH;i++) { - tmp = c->vChrFilter[i]; - p = &vCCoeffsBank[i]; - for (j=0;j<8;j++) - p[j] = tmp; - } - - YCoeffs = vYCoeffsBank+dstY*lumFilterSize; - CCoeffs = vCCoeffsBank+dstY*chrFilterSize; + YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize; + CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize; out = (vector unsigned char *)dest; @@ -962,7 +941,4 @@ altivec_yuv2packedX (SwsContext *c, memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4); } - if (vYCoeffsBank) free (vYCoeffsBank); - if (vCCoeffsBank) free (vCCoeffsBank); - } -- 2.7.4