[MOVED FROM BAD 41/56] gst/deinterlace2/tvtime/tomsmocomp/: Unroll the loop to handle...
authorSebastian Dröge <slomo@circular-chaos.org>
Tue, 26 Aug 2008 12:33:16 +0000 (12:33 +0000)
committerSebastian Dröge <sebastian.droege@collabora.co.uk>
Wed, 13 May 2009 08:34:02 +0000 (10:34 +0200)
Original commit message from CVS:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc:
* gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc:
* gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc:
Unroll the loop to handle two bytes at once. This should give
a small speedup and makes it possible to handle chroma and luma
different which is needed later.

gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc
gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc
gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc
gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc

index ce6d253..e156035 100644 (file)
     return 0;
 #else
 #ifdef SKIP_SEARCH
-            out = best;            // just use the results of our wierd bob
+            out[0] = best[0];            // just use the results of our wierd bob
+           out[1] = best[1];
 #else
-            diff = diff - MIN (diff, 10) - 4;
-           if (diff < 0)
-             out = weave;
+            diff[0] = diff[0] - MIN (diff[0], 10) - 4;
+           diff[1] = diff[1] - MIN (diff[1] - 10) - 4;
+           if (diff[0] < 0)
+             out[0] = weave[0];
            else
-             out = best;
+             out[0] = best[0];
+           
+           if (diff[1] < 0)
+             out[1] = weave[1];
+           else
+             out[1] = best[1];
+
 
-           out = CLAMP (out, MinVals, MaxVals);
+           out[0] = CLAMP (out[0], MinVals[0], MaxVals[0]);
+           out[1] = CLAMP (out[1], MinVals[1], MaxVals[1]);
 #endif
 
 #ifdef USE_VERTICAL_FILTER
-            pDest[x] = (out + pBob[0]) / 2;
-           pDest[x + dst_pitchw] = (pBob[src_pitch2] + out) / 2;
+            pDest[x] = (out[0] + pBob[0]) / 2;
+           pDest[x + dst_pitchw] = (pBob[src_pitch2] + out[0]) / 2;
+            pDest[x + 1] = (out[1] + pBob[1]) / 2;
+           pDest[x + 1 + dst_pitchw] = (pBob[src_pitch2 + 1] + out[1]) / 2;
 #else
-            pDest[x] = out;
+            pDest[x] = out[0];
+           pDest[x+1] = out[1];
 #endif
-            pBob += 1;
-            pBobP += 1;
-            pSrc += 1;
-            pSrcP += 1;
+            pBob += 2;
+            pBobP += 2;
+            pSrc += 2;
+            pSrcP += 2;
        }
         // adjust for next line
         pSrc  = src_pitch2 * (y+1) + pWeaveSrc;
index 9f42650..9d6a490 100644 (file)
@@ -6,29 +6,6 @@ const unsigned char* pSrc;
 const unsigned char* pBob;
 const unsigned char* pBobP;
 
-#ifndef IS_C
-
-int64_t Max_Mov   = 0x0404040404040404ull; 
-int64_t DiffThres = 0x0f0f0f0f0f0f0f0full; 
-int64_t YMask     = 0x00ff00ff00ff00ffull; // keeps only luma
-int64_t UVMask    = 0xff00ff00ff00ff00ull; // keeps only chroma
-int64_t TENS      = 0x0a0a0a0a0a0a0a0aull; 
-int64_t FOURS     = 0x0404040404040404ull; 
-int64_t ONES      = 0x0101010101010101ull; 
-int64_t Min_Vals  = 0x0000000000000000ull;
-int64_t Max_Vals  = 0x0000000000000000ull;
-int64_t ShiftMask = 0xfefffefffefffeffull;
-
-long oldbx;
-
-#else
-
-#ifdef USE_STRANGE_BOB
-int64_t DiffThres = 0x0f;
-#endif
-
-#endif
-
 // long is int32 on ARCH_368, int64 on ARCH_AMD64. Declaring it this way
 // saves a lot of xor's to delete 64bit garbage.
 
@@ -40,23 +17,10 @@ long            src_pitch2 = 2 * src_pitch;         // even & odd lines are interleaved in Avi
 
 
 long       dst_pitch2 = 2 * dst_pitch;
-#ifdef IS_C
+long        y;
 
-long     x,best,diff,avg,diff2,out;
-#endif
-long     y;
-
-#if defined(IS_SSE2)
-long     Last8 = (rowsize-16);                 // ofs to last 16 bytes in row for SSE2
-#elif defined(IS_C)
-long     Last8 = (rowsize-4);                   // ofs to last two pixel in row
-#else
-long     Last8 = (rowsize-8);                  // ofs to last 8 bytes in row
-#endif
+long     Last8;
 
-#ifndef IS_C
-long   dst_pitchw = dst_pitch; // local stor so asm can ref
-#endif
        pSrc  = pWeaveSrc;                      // points 1 weave line above
        pSrcP = pWeaveSrcP;                     // " 
 
@@ -112,9 +76,24 @@ long        dst_pitchw = dst_pitch; // local stor so asm can ref
 #define _YMask      "%17"
 #define _oldbx      "%18"
 #endif
+        Last8 = (rowsize-8);
 
        for (y=1; y < FldHeight-1; y++) 
-       {
+       {       
+          long dst_pitchw = dst_pitch; // local stor so asm can ref
+          int64_t Max_Mov   = 0x0404040404040404ull; 
+          int64_t DiffThres = 0x0f0f0f0f0f0f0f0full; 
+          int64_t YMask     = 0x00ff00ff00ff00ffull; // keeps only luma
+          int64_t UVMask    = 0xff00ff00ff00ff00ull; // keeps only chroma
+          int64_t TENS      = 0x0a0a0a0a0a0a0a0aull; 
+          int64_t FOURS     = 0x0404040404040404ull; 
+          int64_t ONES      = 0x0101010101010101ull; 
+          int64_t Min_Vals  = 0x0000000000000000ull;
+          int64_t Max_Vals  = 0x0000000000000000ull;
+          int64_t ShiftMask = 0xfefffefffefffeffull;
+
+          long oldbx;
+
                // pretend it's indented -->>
         __asm__ __volatile__
             (
@@ -206,9 +185,20 @@ long       dst_pitchw = dst_pitch; // local stor so asm can ref
              "pcmpeqb  %%mm7, %%mm7\n\t"                       // ffff, say we didn't find anything good yet
 
 #else
+        Last8 = (rowsize - 4);
 
        for (y=1; y < FldHeight-1; y++)
        {
+         #ifdef USE_STRANGE_BOB
+         long DiffThres = 0x0f;
+         #endif
+
+         #ifndef SKIP_SEARCH
+         long weave[2], MaxVals[2], MinVals[2];
+         #endif
+
+         long diff[2], best[2], avg[2], diff2[2], out[2], x;
+
 #ifdef USE_VERTICAL_FILTER
              pDest[0] = (3 * pBob[0] + pBob[src_pitch2]) / 4;
              pDest[1] = (3 * pBob[1] + pBob[src_pitch2 + 1]) / 4;
@@ -246,7 +236,7 @@ long        dst_pitchw = dst_pitch; // local stor so asm can ref
             pSrc += 4;
             pSrcP += 4;
 
-             for (x=4; x < Last8; x += 1) {
+             for (x=4; x < Last8; x += 2) {
 
 #ifdef USE_STRANGE_BOB
 #include "StrangeBob.inc"
@@ -258,7 +248,7 @@ long        dst_pitchw = dst_pitch; // local stor so asm can ref
              // from the current location, by rating them by the min distance
              // from the Bob value instead of the avg distance from that value.
              // our best and only rating so far
-             diff = 255;
+             diff[0] = diff[1] = 255;
 
 
 #endif
index 73ce706..45b4c86 100644 (file)
 
 #else
 
-        diff = -1;
-       best = 0;
+        diff[0] = -1;
+        diff[1] = -1;
+       best[0] = 0;
+       best[1] = 0;
        // j, n
         if (ABS (pBob[-2] - pBob[src_pitch2 - 4]) < DiffThres &&
            ABS (pBob[-4] - pBob[src_pitch2 + 4]) > DiffThres) {
-          best = (pBob[-2] + pBob[src_pitch2 - 4]) / 2;
-          diff = ABS (pBob[-2] - pBob[src_pitch2 - 4]);
+          best[0] = (pBob[-2] + pBob[src_pitch2 - 4]) / 2;
+          diff[0] = ABS (pBob[-2] - pBob[src_pitch2 - 4]);
+       }
+        if (ABS (pBob[-1] - pBob[src_pitch2 - 3]) < DiffThres &&
+           ABS (pBob[-3] - pBob[src_pitch2 + 5]) > DiffThres) {
+          best[1] = (pBob[-1] + pBob[src_pitch2 - 3]) / 2;
+          diff[1] = ABS (pBob[-1] - pBob[src_pitch2 - 3]);
        }
 
         // k & m
         if (ABS (pBob[2] - pBob[src_pitch2 + 4]) < DiffThres &&
            ABS (pBob[4] - pBob[src_pitch2 - 4]) > DiffThres) {
-          best = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
-          diff = ABS (pBob[4] - pBob[src_pitch2 - 4]);
+          best[0] = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
+          diff[0] = ABS (pBob[4] - pBob[src_pitch2 - 4]);
+       }
+
+        if (ABS (pBob[3] - pBob[src_pitch2 + 5]) < DiffThres &&
+           ABS (pBob[5] - pBob[src_pitch2 - 3]) > DiffThres) {
+          best[1] = (pBob[5] + pBob[src_pitch2 - 3]) / 2;
+          diff[1] = ABS (pBob[5] - pBob[src_pitch2 - 3]);
        }
 
         // c & d
        if (ABS (pBob[0] - pBob[src_pitch2 + 2]) < DiffThres &&
            ABS (pBob[2] - pBob[src_pitch2 - 2]) > DiffThres) {
-          best = (pBob[2] + pBob[src_pitch2 - 2]) / 2;
-          diff = ABS (pBob[2] - pBob[src_pitch2 - 2]);
+          best[0] = (pBob[2] + pBob[src_pitch2 - 2]) / 2;
+          diff[0] = ABS (pBob[2] - pBob[src_pitch2 - 2]);
+       }
+
+       if (ABS (pBob[1] - pBob[src_pitch2 + 3]) < DiffThres &&
+           ABS (pBob[3] - pBob[src_pitch2 - 1]) > DiffThres) {
+          best[1] = (pBob[3] + pBob[src_pitch2 - 1]) / 2;
+          diff[1] = ABS (pBob[3] - pBob[src_pitch2 - 1]);
        }
 
         // a & f
        if (ABS (pBob[0] - pBob[src_pitch2 - 2]) < DiffThres &&
            ABS (pBob[-2] - pBob[src_pitch2 + 2]) > DiffThres) {
-          best = (pBob[-2] + pBob[src_pitch2 + 2]) / 2;
-          diff = ABS (pBob[-2] - pBob[src_pitch2 + 2]);
+          best[0] = (pBob[-2] + pBob[src_pitch2 + 2]) / 2;
+          diff[0] = ABS (pBob[-2] - pBob[src_pitch2 + 2]);
+       }
+
+       if (ABS (pBob[1] - pBob[src_pitch2 - 1]) < DiffThres &&
+           ABS (pBob[-1] - pBob[src_pitch2 + 3]) > DiffThres) {
+          best[1] = (pBob[-1] + pBob[src_pitch2 + 3]) / 2;
+          diff[1] = ABS (pBob[-1] - pBob[src_pitch2 + 3]);
        }
 
        // b,e
        if (ABS (pBob[0] - pBob[src_pitch2]) < DiffThres) {
-          best = (pBob[0] + pBob[src_pitch2]) / 2;
-          diff = ABS (pBob[0] - pBob[src_pitch2]);
+          best[0] = (pBob[0] + pBob[src_pitch2]) / 2;
+          diff[0] = ABS (pBob[0] - pBob[src_pitch2]);
        }
 
+       if (ABS (pBob[1] - pBob[src_pitch2 + 1]) < DiffThres) {
+          best[1] = (pBob[1] + pBob[src_pitch2 + 1]) / 2;
+          diff[1] = ABS (pBob[1] - pBob[src_pitch2 + 1]);
+       }
+
+
 // We will also calc here the max/min values to later limit comb
 // so the max excursion will not exceed the Max_Comb constant
 
 #ifdef SKIP_SEARCH
-               best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+               best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+               best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
 #else
-               mov = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2]));
-
-               MinVals = 0;
-               MaxVals = 255;
-               if (mov > DiffThres) {
-                 MinVals = MAX (MIN (pBob[0], pBob[src_pitch2]), best);
-                 MaxVals = MIN (MAX (pBob[0], pBob[src_pitch2]), best);
+               mov[0] = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2]));
+               mov[1] = MAX (ABS (pBob[1] - pBobP[1]), ABS (pBob[src_pitch2 + 1] - pBobP[src_pitch2 + 1]));
+
+               MinVals[0] = 0;
+               MinVals[1] = 0;
+               MaxVals[0] = 255;
+               MaxVals[1] = 255;
+               if (mov[0] > DiffThres) {
+                 MinVals[0] = MAX (MIN (pBob[0], pBob[src_pitch2]), best[0]);
+                 MaxVals[0] = MIN (MAX (pBob[0], pBob[src_pitch2]), best[0]);
+               }
+               
+               if (mov[1] > DiffThres) {
+                 MinVals[1] = MAX (MIN (pBob[1], pBob[src_pitch2+1]), best[1]);
+                 MaxVals[1] = MIN (MAX (pBob[1], pBob[src_pitch2+1]), best[1]);
                }
 
-               best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+               best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+               best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
 #endif
+               avg[0] = (pBob[src_pitch2] + pBob[0]) / 2;
+               avg[1] = (pBob[src_pitch2 + 1] + pBob[1]) / 2;
+               diff2[0] = ABS (pBob[src_pitch2 + 1] - pBob[1]);
+               diff2[1] = ABS (pBob[src_pitch2 + 1] - pBob[1]);
+
+               if (diff[0] == -1 || diff2[0] < diff[0]) {
+                 best[0] = avg[0];
+                 diff[0] = diff2[0];
+               }
 
-               avg = (pBob[src_pitch2] + pBob[0]) / 2;
-               diff2 = ABS (pBob[src_pitch2] - pBob[0]);
-
-               if (diff == -1 || diff2 < diff) {
-                 best = avg;
-                 diff = diff2;
+               if (diff[1] == -1 || diff2[1] < diff[1]) {
+                 best[1] = avg[1];
+                 diff[1] = diff2[1];
                }
 #endif
index 6cbd1b8..f4bbb83 100644 (file)
 #else
 
         // a,f
-        best = (pBob[-2] + pBob[src_pitch2 + 2]) / 2;
-       diff = ABS (pBob[-2] - pBob[src_pitch2 + 2]);
+        best[0] = (pBob[-2] + pBob[src_pitch2 + 2]) / 2;
+       diff[0] = ABS (pBob[-2] - pBob[src_pitch2 + 2]);
+        best[1] = (pBob[-1] + pBob[src_pitch2 + 3]) / 2;
+       diff[1] = ABS (pBob[-1] - pBob[src_pitch2 + 3]);
 
         // c,d
-       if (ABS (pBob[2] - pBob[src_pitch2 - 2]) < diff) {
-          best = (pBob[2] + pBob[src_pitch2 - 2]) / 2;
-         diff = ABS (pBob[2] - pBob[src_pitch2 - 2]);
+       if (ABS (pBob[2] - pBob[src_pitch2 - 2]) < diff[0]) {
+          best[0] = (pBob[2] + pBob[src_pitch2 - 2]) / 2;
+         diff[0] = ABS (pBob[2] - pBob[src_pitch2 - 2]);
+       }
+
+       if (ABS (pBob[3] - pBob[src_pitch2 - 1]) < diff[1]) {
+          best[1] = (pBob[3] + pBob[src_pitch2 - 1]) / 2;
+         diff[1] = ABS (pBob[3] - pBob[src_pitch2 - 1]);
        }
 
        // j,n
-       if (ABS (pBob[-4] - pBob[src_pitch2 + 4]) < diff) {
-          best = (pBob[-4] + pBob[src_pitch2 + 4]) / 2;
-         diff = ABS (pBob[-4] - pBob[src_pitch2 + 4]);
+       if (ABS (pBob[-4] - pBob[src_pitch2 + 4]) < diff[0]) {
+          best[0] = (pBob[-4] + pBob[src_pitch2 + 4]) / 2;
+         diff[0] = ABS (pBob[-4] - pBob[src_pitch2 + 4]);
+       }
+
+       if (ABS (pBob[-3] - pBob[src_pitch2 + 5]) < diff[1]) {
+          best[1] = (pBob[-3] + pBob[src_pitch2 + 5]) / 2;
+         diff[1] = ABS (pBob[-3] - pBob[src_pitch2 + 5]);
        }
 
        // k,m
-       if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff) {
-          best = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
-         diff = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
+       if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff[0]) {
+          best[0] = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
+         diff[0] = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
        }
 
+       if (ABS (pBob[5] - pBob[src_pitch2 - 3]) < diff[1]) {
+          best[1] = (pBob[5] + pBob[src_pitch2 - 3]) / 2;
+         diff[1] = ABS (pBob[-3] - pBob[src_pitch2 - 3]);
+       }
        // k,m
-       if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff) {
-          best = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
-         diff = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
+       if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff[0]) {
+          best[0] = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
+         diff[0] = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
+       }
+       
+       if (ABS (pBob[5] - pBob[src_pitch2 - 3]) < diff[1]) {
+          best[1] = (pBob[5] + pBob[src_pitch2 - 3]) / 2;
+         diff[1] = ABS (pBob[-3] - pBob[src_pitch2 - 3]);
        }
 
 // We will also calc here the max/min values to later limit comb
 // so the max excursion will not exceed the Max_Comb constant
 
 #ifdef SKIP_SEARCH
-               best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+               best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+               best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
 #else
-               mov = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2]));
+               mov[0] = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2]));
+               mov[1] = MAX (ABS (pBob[1] - pBobP[1]), ABS (pBob[src_pitch2 + 1] - pBobP[src_pitch2 + 1]));
 
-               MinVals = 0;
-               MaxVals = 255;
-               if (mov > Max_Mov) {
-                 MinVals = MAX (MIN (pBob[0], pBob[src_pitch2]), best);
-                 MaxVals = MIN (MAX (pBob[0], pBob[src_pitch2]), best);
+               MinVals[0] = 0;
+               MinVals[1] = 0;
+               MaxVals[0] = 255;
+               MaxVals[1] = 255;
+
+               if (mov[0] > Max_Mov[0]) {
+                 MinVals[0] = MAX (MIN (pBob[0], pBob[src_pitch2]), best[0]);
+                 MaxVals[0] = MIN (MAX (pBob[0], pBob[src_pitch2]), best[0]);
+               }
+               
+               if (mov[1] > Max_Mov[1]) {
+                 MinVals[1] = MAX (MIN (pBob[1], pBob[src_pitch2 + 1]), best[1]);
+                 MaxVals[1] = MIN (MAX (pBob[1], pBob[src_pitch2 + 1]), best[1]);
                }
 
-               best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+               best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+               best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
 #endif
 
-               avg = (pBob[src_pitch2] + pBob[0]) / 2;
-               diff2 = ABS (pBob[src_pitch2] - pBob[0]);
+               avg[0] = (pBob[src_pitch2] + pBob[0]) / 2;
+               avg[1] = (pBob[src_pitch2 + 1] + pBob[1]) / 2;
+               diff2[0] = ABS (pBob[src_pitch2] - pBob[0]);
+               diff2[1] = ABS (pBob[src_pitch2 + 1] - pBob[1]);
+
+               if (diff2[0] < diff[0]) {
+                 best[0] = avg[0];
+                 diff[0] = diff2[0];
+               }
 
-               if (diff2 < diff) {
-                 best = avg;
-                 diff = diff2;
+               if (diff2[1] < diff[1]) {
+                 best[1] = avg[1];
+                 diff[1] = diff2[1];
                }
 #endif