Move the v{Y,C}CoeffsBank vectors into the SwsContext, filling them in just
authorAlan Curry <pacman@world.std.com>
Sat, 11 Feb 2006 14:16:10 +0000 (14:16 +0000)
committerDiego Biurrun <diego@biurrun.de>
Sat, 11 Feb 2006 14:16:10 +0000 (14:16 +0000)
once when the scaler is initialized, instead of building them and freeing
them over and over. This gives massive performance improvements.
patch by Alan Curry, pacman*at*TheWorld*dot*com

Originally committed as revision 17589 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc

postproc/swscale.c
postproc/swscale_internal.h
postproc/yuv2rgb_altivec.c

index e4537f7..6f9c203 100644 (file)
@@ -2110,6 +2110,25 @@ SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int
                                c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
                                (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
                                srcFilter->chrV, dstFilter->chrV, c->param);
+
+#ifdef HAVE_ALTIVEC
+               c->vYCoeffsBank = memalign (16, sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
+               c->vCCoeffsBank = memalign (16, sizeof (vector signed short)*c->vChrFilterSize*c->dstH);
+
+               for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
+                  int j;
+                 short *p = (short *)&c->vYCoeffsBank[i];
+                 for (j=0;j<8;j++)
+                   p[j] = c->vLumFilter[i];
+               }
+
+               for (i=0;i<c->vChrFilterSize*c->dstH;i++) {
+                  int j;
+                 short *p = (short *)&c->vCCoeffsBank[i];
+                 for (j=0;j<8;j++)
+                   p[j] = c->vChrFilter[i];
+               }
+#endif
        }
 
        // Calculate Buffer Sizes so that they won't run out while handling these damn slices
@@ -2644,6 +2663,12 @@ void sws_freeContext(SwsContext *c){
        c->hLumFilter = NULL;
        if(c->hChrFilter) free(c->hChrFilter);
        c->hChrFilter = NULL;
+#ifdef HAVE_ALTIVEC
+       if(c->vYCoeffsBank) free(c->vYCoeffsBank);
+       c->vYCoeffsBank = NULL;
+       if(c->vCCoeffsBank) free(c->vCCoeffsBank);
+       c->vCCoeffsBank = NULL;
+#endif
 
        if(c->vLumFilterPos) free(c->vLumFilterPos);
        c->vLumFilterPos = NULL;
index c6611da..b4e1dbe 100644 (file)
@@ -154,6 +154,7 @@ typedef struct SwsContext{
   vector signed short   CGV;
   vector signed short   OY;
   vector unsigned short CSHIFT;
+  vector signed short *vYCoeffsBank, *vCCoeffsBank;
 
 #endif
 
index 2d2f776..dee68b2 100644 (file)
@@ -774,8 +774,6 @@ altivec_yuv2packedX (SwsContext *c,
                       uint8_t *dest, int dstW, int dstY)
 {
   int i,j;
-  short tmp __attribute__((aligned (16)));
-  int16_t *p;
   short *f;
   vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
   vector signed short R0,G0,B0,R1,G1,B1;
@@ -787,29 +785,10 @@ altivec_yuv2packedX (SwsContext *c,
   vector unsigned short SCL = vec_splat((vector unsigned short)AVV(4),0);
   unsigned long scratch[16] __attribute__ ((aligned (16)));
 
-  vector signed short *vYCoeffsBank, *vCCoeffsBank;
-
   vector signed short *YCoeffs, *CCoeffs;
 
-  vYCoeffsBank = memalign (16, sizeof (vector signed short)*lumFilterSize*c->dstH);
-  vCCoeffsBank = memalign (16, sizeof (vector signed short)*chrFilterSize*c->dstH);
-
-  for (i=0;i<lumFilterSize*c->dstH;i++) {
-    tmp = c->vLumFilter[i];
-    p = &vYCoeffsBank[i];
-    for (j=0;j<8;j++)
-      p[j] = tmp;
-  }
-
-  for (i=0;i<chrFilterSize*c->dstH;i++) {
-    tmp = c->vChrFilter[i];
-    p = &vCCoeffsBank[i];
-    for (j=0;j<8;j++)
-      p[j] = tmp;
-  }
-
-  YCoeffs = vYCoeffsBank+dstY*lumFilterSize;
-  CCoeffs = vCCoeffsBank+dstY*chrFilterSize;
+  YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize;
+  CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize;
 
   out = (vector unsigned char *)dest;
 
@@ -962,7 +941,4 @@ altivec_yuv2packedX (SwsContext *c,
     memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
   }
 
-  if (vYCoeffsBank) free (vYCoeffsBank);
-  if (vCCoeffsBank) free (vCCoeffsBank);
-
 }