fixed a bug in the tmp buffer
authorMichael Niedermayer <michaelni@gmx.at>
Fri, 19 Oct 2001 13:41:38 +0000 (13:41 +0000)
committerMichael Niedermayer <michaelni@gmx.at>
Fri, 19 Oct 2001 13:41:38 +0000 (13:41 +0000)
fixed the color range for yuv
fixed the width %8!=0 bug (another 1% speed loss)

Originally committed as revision 2286 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc

postproc/postprocess.c
postproc/postprocess.h
postproc/postprocess_template.c

index 33ebf42..f558a1e 100644 (file)
@@ -122,7 +122,7 @@ static uint64_t temp3=0;
 static uint64_t temp4=0;
 static uint64_t temp5=0;
 static uint64_t pQPb=0;
-static uint8_t tempBlock[16*16];
+static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data
 
 int hFlatnessThreshold= 56 - 16;
 int vFlatnessThreshold= 56 - 16;
@@ -132,7 +132,7 @@ double maxClippedThreshold= 0.01;
 
 int maxAllowedY=255;
 //FIXME can never make a movie´s black brighter (anyone needs that?)
-int minAllowedY=0;
+int minAllowedY=16;
 
 #ifdef TIMING
 static inline long long rdtsc()
@@ -2398,6 +2398,13 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
        static uint8_t *tempDst= NULL;
        static uint8_t *tempSrc= NULL;
 
+       /* Temporary buffers for handling the last block */
+       static uint8_t *tempDstBlock= NULL;
+       static uint8_t *tempSrcBlock= NULL;
+
+       uint8_t *dstBlockPtrBackup;
+       uint8_t *srcBlockPtrBackup;
+
 #ifdef TIMING
        long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0;
        sumTime= rdtsc();
@@ -2407,6 +2414,8 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
        {
                tempDst= (uint8_t*)memalign(8, 1024*24);
                tempSrc= (uint8_t*)memalign(8, 1024*24);
+               tempDstBlock= (uint8_t*)memalign(8, 1024*24);
+               tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
        }
 
        if(!yHistogram)
@@ -2414,6 +2423,12 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
                int i;
                yHistogram= (uint64_t*)malloc(8*256);
                for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256;
+
+               if(mode & FULL_Y_RANGE)
+               {
+                       maxAllowedY=255;
+                       minAllowedY=0;
+               }
        }
 
        if(!isColor)
@@ -2505,6 +2520,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
                        srcBlock= tempSrc;
                }
 
+               // From this point on it is guranteed that we can read and write 16 lines downward
                // finish 1 block before the next otherwise we´ll might have a problem
                // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
                for(x=0; x<width; x+=BLOCK_SIZE)
@@ -2545,6 +2561,23 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
 
                        if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++;
 
+                       //can we mess with a 8x16 block, if not use a temp buffer, yes again
+                       if(x+7 >= width)
+                       {
+                               int i;
+                               dstBlockPtrBackup= dstBlock;
+                               srcBlockPtrBackup= srcBlock;
+
+                               for(i=0;i<BLOCK_SIZE*2; i++)
+                               {
+                                       memcpy(tempSrcBlock+i*srcStride, srcBlock+i*srcStride, width-x);
+                                       memcpy(tempDstBlock+i*dstStride, dstBlock+i*dstStride, width-x);
+                               }
+
+                               dstBlock= tempDstBlock;
+                               srcBlock= tempSrcBlock;
+                       }
+
                        blockCopy(dstBlock + dstStride*5, dstStride,
                                srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX);
 
@@ -2593,7 +2626,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
                        }
 
                        /* check if we have a previous block to deblock it with dstBlock */
-                       if(x - 8 >= 0 && x<width)
+                       if(x - 8 >= 0)
                        {
 #ifdef MORE_TIMING
                                T0= rdtsc();
@@ -2624,12 +2657,25 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
                                dering(dstBlock - stride*9 + width-9, stride, QP);
                        //FIXME dering filter will not be applied to last block (bottom right)
 
+                       /* did we use a tmp-block buffer */
+                       if(x+7 >= width)
+                       {
+                               int i;
+                               dstBlock= dstBlockPtrBackup;
+                               srcBlock= srcBlockPtrBackup;
+
+                               for(i=0;i<BLOCK_SIZE*2; i++)
+                               {
+                                       memcpy(dstBlock+i*dstStride, tempDstBlock+i*dstStride, width-x);
+                               }
+                       }
+
                        dstBlock+=8;
                        srcBlock+=8;
                }
 
                /* did we use a tmp buffer */
-               if(y+15 > height)
+               if(y+15 >= height)
                {
                        uint8_t *dstBlock= &(dst[y*dstStride]);
                        memcpy(dstBlock, tempDst, dstStride*(height-y) );
index e7eb248..20880a9 100644 (file)
@@ -46,6 +46,9 @@
 #define H_RK1_FILTER   0x1000                  // 4096 (not implemented yet)
 #define H_X1_FILTER    0x2000                  // 8192
 
+// select between full y range (255-0) or standart one (
+#define FULL_Y_RANGE   0x8000                  // 32768
+
 //Deinterlacing Filters
 #define        LINEAR_IPOL_DEINT_FILTER        0x10000 // 65536
 #define        LINEAR_BLEND_DEINT_FILTER       0x20000 // 131072
index 33ebf42..f558a1e 100644 (file)
@@ -122,7 +122,7 @@ static uint64_t temp3=0;
 static uint64_t temp4=0;
 static uint64_t temp5=0;
 static uint64_t pQPb=0;
-static uint8_t tempBlock[16*16];
+static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data
 
 int hFlatnessThreshold= 56 - 16;
 int vFlatnessThreshold= 56 - 16;
@@ -132,7 +132,7 @@ double maxClippedThreshold= 0.01;
 
 int maxAllowedY=255;
 //FIXME can never make a movie´s black brighter (anyone needs that?)
-int minAllowedY=0;
+int minAllowedY=16;
 
 #ifdef TIMING
 static inline long long rdtsc()
@@ -2398,6 +2398,13 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
        static uint8_t *tempDst= NULL;
        static uint8_t *tempSrc= NULL;
 
+       /* Temporary buffers for handling the last block */
+       static uint8_t *tempDstBlock= NULL;
+       static uint8_t *tempSrcBlock= NULL;
+
+       uint8_t *dstBlockPtrBackup;
+       uint8_t *srcBlockPtrBackup;
+
 #ifdef TIMING
        long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0;
        sumTime= rdtsc();
@@ -2407,6 +2414,8 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
        {
                tempDst= (uint8_t*)memalign(8, 1024*24);
                tempSrc= (uint8_t*)memalign(8, 1024*24);
+               tempDstBlock= (uint8_t*)memalign(8, 1024*24);
+               tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
        }
 
        if(!yHistogram)
@@ -2414,6 +2423,12 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
                int i;
                yHistogram= (uint64_t*)malloc(8*256);
                for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256;
+
+               if(mode & FULL_Y_RANGE)
+               {
+                       maxAllowedY=255;
+                       minAllowedY=0;
+               }
        }
 
        if(!isColor)
@@ -2505,6 +2520,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
                        srcBlock= tempSrc;
                }
 
+               // From this point on it is guranteed that we can read and write 16 lines downward
                // finish 1 block before the next otherwise we´ll might have a problem
                // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
                for(x=0; x<width; x+=BLOCK_SIZE)
@@ -2545,6 +2561,23 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
 
                        if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++;
 
+                       //can we mess with a 8x16 block, if not use a temp buffer, yes again
+                       if(x+7 >= width)
+                       {
+                               int i;
+                               dstBlockPtrBackup= dstBlock;
+                               srcBlockPtrBackup= srcBlock;
+
+                               for(i=0;i<BLOCK_SIZE*2; i++)
+                               {
+                                       memcpy(tempSrcBlock+i*srcStride, srcBlock+i*srcStride, width-x);
+                                       memcpy(tempDstBlock+i*dstStride, dstBlock+i*dstStride, width-x);
+                               }
+
+                               dstBlock= tempDstBlock;
+                               srcBlock= tempSrcBlock;
+                       }
+
                        blockCopy(dstBlock + dstStride*5, dstStride,
                                srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX);
 
@@ -2593,7 +2626,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
                        }
 
                        /* check if we have a previous block to deblock it with dstBlock */
-                       if(x - 8 >= 0 && x<width)
+                       if(x - 8 >= 0)
                        {
 #ifdef MORE_TIMING
                                T0= rdtsc();
@@ -2624,12 +2657,25 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
                                dering(dstBlock - stride*9 + width-9, stride, QP);
                        //FIXME dering filter will not be applied to last block (bottom right)
 
+                       /* did we use a tmp-block buffer */
+                       if(x+7 >= width)
+                       {
+                               int i;
+                               dstBlock= dstBlockPtrBackup;
+                               srcBlock= srcBlockPtrBackup;
+
+                               for(i=0;i<BLOCK_SIZE*2; i++)
+                               {
+                                       memcpy(dstBlock+i*dstStride, tempDstBlock+i*dstStride, width-x);
+                               }
+                       }
+
                        dstBlock+=8;
                        srcBlock+=8;
                }
 
                /* did we use a tmp buffer */
-               if(y+15 > height)
+               if(y+15 >= height)
                {
                        uint8_t *dstBlock= &(dst[y*dstStride]);
                        memcpy(dstBlock, tempDst, dstStride*(height-y) );