[MOVED FROM BAD 40/56] gst/deinterlace2/: First part of the C implementation of the...
authorSebastian Dröge <slomo@circular-chaos.org>
Mon, 25 Aug 2008 14:37:45 +0000 (14:37 +0000)
committerSebastian Dröge <sebastian.droege@collabora.co.uk>
Wed, 13 May 2009 08:34:02 +0000 (10:34 +0200)
Original commit message from CVS:
* gst/deinterlace2/gstdeinterlace2.c:
(gst_deinterlace_method_class_init):
* gst/deinterlace2/gstdeinterlace2.h:
* gst/deinterlace2/tvtime/tomsmocomp.c:
(gst_deinterlace_method_tomsmocomp_class_init):
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc:
* gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc:
* gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc:
* gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc:
* gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc:
* gst/deinterlace2/tvtime/tomsmocomp/tomsmocompmacros.h:
First part of the C implementation of the tomsmocomp deinterlacing
algorithm. This only supports search-effort=0 currently, is painfully
slow and needs some cleanup later when all search-effort settings
are implemented in C.

gst/deinterlace2/gstdeinterlace2.c
gst/deinterlace2/gstdeinterlace2.h
gst/deinterlace2/tvtime/tomsmocomp.c
gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc
gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc
gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc
gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc
gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc
gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc
gst/deinterlace2/tvtime/tomsmocomp/tomsmocompmacros.h

index fd9ed99..0ef5381 100644 (file)
@@ -55,7 +55,7 @@ G_DEFINE_TYPE (GstDeinterlaceMethod, gst_deinterlace_method, GST_TYPE_OBJECT);
 static void
 gst_deinterlace_method_class_init (GstDeinterlaceMethodClass * klass)
 {
-  klass->available = TRUE;
+
 }
 
 static void
index 01f1831..4080618 100644 (file)
@@ -74,8 +74,6 @@ struct _GstDeinterlaceMethodClass {
   guint fields_required;
   guint latency;
 
-  gboolean available;
-
   void (*deinterlace_frame) (GstDeinterlaceMethod *self, GstDeinterlace2 * parent);
 
   const gchar *name;
index c27d487..0565b6c 100644 (file)
@@ -29,9 +29,6 @@
 #include "gstdeinterlace2.h"
 #include "plugins.h"
 
-#include "tomsmocomp/tomsmocompmacros.h"
-#include "x86-64_macros.inc"
-
 #define GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP (gst_deinterlace_method_tomsmocomp_get_type ())
 #define GST_IS_DEINTERLACE_METHOD_TOMSMOCOMP(obj)              (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP))
 #define GST_IS_DEINTERLACE_METHOD_TOMSMOCOMP_CLASS(klass)      (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP))
@@ -72,6 +69,20 @@ Fieldcopy (void *dest, const void *src, size_t count,
   return 0;
 }
 
+#define USE_FOR_DSCALER
+
+#define IS_C
+#define SIMD_TYPE C
+#define FUNCT_NAME tomsmocompDScaler_C
+#include "tomsmocomp/TomsMoCompAll.inc"
+#undef  IS_C
+#undef  SIMD_TYPE
+#undef  FUNCT_NAME
+
+#ifdef BUILD_X86_ASM
+
+#include "tomsmocomp/tomsmocompmacros.h"
+#include "x86-64_macros.inc"
 
 #define IS_MMX
 #define SIMD_TYPE MMX
@@ -97,6 +108,8 @@ Fieldcopy (void *dest, const void *src, size_t count,
 #undef  SIMD_TYPE
 #undef  FUNCT_NAME
 
+#endif
+
 G_DEFINE_TYPE (GstDeinterlaceMethodTomsMoComp,
     gst_deinterlace_method_tomsmocomp, GST_TYPE_DEINTERLACE_METHOD);
 
@@ -173,6 +186,7 @@ static void
   dim_class->nick = "tomsmocomp";
   dim_class->latency = 1;
 
+#ifdef BUILD_X86_ASM
   if (cpu_flags & OIL_IMPL_FLAG_MMXEXT) {
     dim_class->deinterlace_frame = tomsmocompDScaler_MMXEXT;
   } else if (cpu_flags & OIL_IMPL_FLAG_3DNOW) {
@@ -180,8 +194,11 @@ static void
   } else if (cpu_flags & OIL_IMPL_FLAG_MMX) {
     dim_class->deinterlace_frame = tomsmocompDScaler_MMX;
   } else {
-    dim_class->available = FALSE;
+    dim_class->deinterlace_frame = tomsmocompDScaler_C;
   }
+#else
+  dim_class->deinterlace_frame = tomsmocompDScaler_C;
+#endif
 }
 
 static void
index 6b6ee4c..ce6d253 100644 (file)
@@ -1,11 +1,9 @@
 // -*- c++ -*-       
 
-#ifdef IS_SSE2
-//sse2 code deleted for now
-#else
-
 // Version for non-SSE2
 
+#ifndef IS_C
+
 #ifdef SKIP_SEARCH
             "movq    %%mm6, %%mm0\n\t"            // just use the results of our wierd bob
 #else
@@ -59,6 +57,7 @@
             V_MOVNTQ ("(%"XAX", %%"XDX")", "%%mm0")
             //      pavgb   mm1, qword ptr["XBX"+"XCX"]
             V_PAVGB ("%%mm1", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask)
+           //FIXME: XDX or XAX!!
             "addq   "_dst_pitchw", %%"XBX
             //      movntq  qword ptr["XAX"+"XDX"], mm1
             V_MOVNTQ ("(%%"XAX", %%"XDX")", "%%mm1")
@@ -71,7 +70,6 @@
            LEAX"    8(%%"XDX"), %%"XDX"\n\t"       // bump offset pointer
            CMPX"    "_Last8", %%"XDX"\n\t"       // done with line?
            "jb      1b\n\t"                    // y
-#endif
 
            MOVX" "_oldbx", %%"XBX"\n\t"
 
     }
     
     return 0;
+#else
+#ifdef SKIP_SEARCH
+            out = best;            // just use the results of our wierd bob
+#else
+            diff = diff - MIN (diff, 10) - 4;
+           if (diff < 0)
+             out = weave;
+           else
+             out = best;
+
+           out = CLAMP (out, MinVals, MaxVals);
+#endif
+
+#ifdef USE_VERTICAL_FILTER
+            pDest[x] = (out + pBob[0]) / 2;
+           pDest[x + dst_pitchw] = (pBob[src_pitch2] + out) / 2;
+#else
+            pDest[x] = out;
+#endif
+            pBob += 1;
+            pBobP += 1;
+            pSrc += 1;
+            pSrcP += 1;
+       }
+        // adjust for next line
+        pSrc  = src_pitch2 * (y+1) + pWeaveSrc;
+        pSrcP = src_pitch2 * (y+1) + pWeaveSrcP;
+        pDest = dst_pitch2 * (y+1) + pWeaveDest + dst_pitch2;
+
+
+       if (TopFirst)
+       {
+               pBob = pCopySrc + src_pitch2;
+               pBobP = pCopySrcP + src_pitch2;
+       }
+       else
+       {
+               pBob =  pCopySrc;
+               pBobP =  pCopySrcP;
+       }
+
+        pBob  += src_pitch2 * (y+1);
+        pBobP += src_pitch2 * (y+1);
+    }
+    
+    return 0;
+
+#endif
index 64e562b..9f42650 100644 (file)
@@ -6,6 +6,8 @@ const unsigned char* pSrc;
 const unsigned char* pBob;
 const unsigned char* pBobP;
 
+#ifndef IS_C
+
 int64_t Max_Mov   = 0x0404040404040404ull; 
 int64_t DiffThres = 0x0f0f0f0f0f0f0f0full; 
 int64_t YMask     = 0x00ff00ff00ff00ffull; // keeps only luma
@@ -19,6 +21,14 @@ int64_t ShiftMask = 0xfefffefffefffeffull;
 
 long oldbx;
 
+#else
+
+#ifdef USE_STRANGE_BOB
+int64_t DiffThres = 0x0f;
+#endif
+
+#endif
+
 // long is int32 on ARCH_368, int64 on ARCH_AMD64. Declaring it this way
 // saves a lot of xor's to delete 64bit garbage.
 
@@ -30,15 +40,23 @@ long            src_pitch2 = 2 * src_pitch;         // even & odd lines are interleaved in Avi
 
 
 long       dst_pitch2 = 2 * dst_pitch;
+#ifdef IS_C
+
+long     x,best,diff,avg,diff2,out;
+#endif
 long     y;
 
-#ifdef IS_SSE2
+#if defined(IS_SSE2)
 long     Last8 = (rowsize-16);                 // ofs to last 16 bytes in row for SSE2
+#elif defined(IS_C)
+long     Last8 = (rowsize-4);                   // ofs to last two pixel in row
 #else
 long     Last8 = (rowsize-8);                  // ofs to last 8 bytes in row
 #endif
 
-long           dst_pitchw = dst_pitch; // local stor so asm can ref
+#ifndef IS_C
+long   dst_pitchw = dst_pitch; // local stor so asm can ref
+#endif
        pSrc  = pWeaveSrc;                      // points 1 weave line above
        pSrcP = pWeaveSrcP;                     // " 
 
@@ -71,6 +89,8 @@ long          dst_pitchw = dst_pitch; // local stor so asm can ref
                pBobP =  pCopySrcP;
        }
 
+#ifndef IS_C
+
 #ifndef _pBob
 #define _pBob       "%0"
 #define _src_pitch2 "%1"
@@ -110,11 +130,6 @@ long               dst_pitchw = dst_pitch; // local stor so asm can ref
              // Save "XBX" (-fPIC)
             MOVX" %%"XBX", "_oldbx"\n\t"
              
-#ifdef IS_SSE2
-             
-             // sse2 code deleted for now
-
-#else
              // simple bob first 8 bytes
              MOVX"     "_pBob",        %%"XBX"\n\t"
              MOVX"     "_src_pitch2",  %%"XCX"\n\t"
@@ -190,4 +205,60 @@ long               dst_pitchw = dst_pitch; // local stor so asm can ref
              // our best and only rating so far
              "pcmpeqb  %%mm7, %%mm7\n\t"                       // ffff, say we didn't find anything good yet
 
+#else
+
+       for (y=1; y < FldHeight-1; y++)
+       {
+#ifdef USE_VERTICAL_FILTER
+             pDest[0] = (3 * pBob[0] + pBob[src_pitch2]) / 4;
+             pDest[1] = (3 * pBob[1] + pBob[src_pitch2 + 1]) / 4;
+             pDest[2] = (3 * pBob[2] + pBob[src_pitch2 + 2]) / 4;
+             pDest[3] = (3 * pBob[3] + pBob[src_pitch2 + 3]) / 4;
+            pDest[dst_pitchw] = (pBob[0] + 3 * pBob[src_pitch2]) / 4;
+            pDest[dst_pitchw + 1] = (pBob[1] + 3 * pBob[src_pitch2 + 1]) / 4;
+            pDest[dst_pitchw + 2] = (pBob[2] + 3 * pBob[src_pitch2 + 2]) / 4;
+            pDest[dst_pitchw + 3] = (pBob[3] + 3 * pBob[src_pitch2 + 3]) / 4;
+
+             // simple bob last byte
+            pDest[Last8] = (3 * pBob[Last8] + pBob[Last8 + src_pitch2]) / 4;
+            pDest[Last8 + 1] = (3 * pBob[Last8 + 1] + pBob[Last8 + src_pitch2 + 1]) / 4;
+            pDest[Last8 + 2] = (3 * pBob[Last8 + 2] + pBob[Last8 + src_pitch2 + 2]) / 4;
+            pDest[Last8 + 3] = (3 * pBob[Last8 + 3] + pBob[Last8 + src_pitch2 + 3]) / 4;
+            pDest[Last8 + src_pitch2] = (pBob[Last8] + 3 * pBob[Last8 + src_pitch2]) / 4;
+            pDest[Last8 + src_pitch2 + 1] = (pBob[Last8 + 1] + 3 * pBob[Last8 + src_pitch2 + 1]) / 4;
+            pDest[Last8 + src_pitch2 + 2] = (pBob[Last8 + 2] + 3 * pBob[Last8 + src_pitch2 + 2]) / 4;
+            pDest[Last8 + src_pitch2 + 3] = (pBob[Last8 + 3] + 3 * pBob[Last8 + src_pitch2 + 3]) / 4;
+#else
+             pDest[0] = (pBob[0] + pBob[src_pitch2 + 1]) / 2;
+             pDest[1] = (pBob[1] + pBob[src_pitch2 + 1]) / 2;
+             pDest[2] = (pBob[2] + pBob[src_pitch2 + 2]) / 2;
+             pDest[3] = (pBob[3] + pBob[src_pitch2 + 3]) / 2;
+
+             // simple bob last byte
+            pDest[Last8] = (pBob[Last8] + pBob[Last8 + src_pitch2]) / 2;
+            pDest[Last8 + 1] = (pBob[Last8 + 1] + pBob[Last8 + src_pitch2 + 1]) / 2;
+            pDest[Last8 + 2] = (pBob[Last8 + 2] + pBob[Last8 + src_pitch2 + 2]) / 2;
+            pDest[Last8 + 3] = (pBob[Last8 + 3] + pBob[Last8 + src_pitch2 + 3]) / 2;
+#endif
+
+             pBob += 4;
+            pBobP += 4;
+            pSrc += 4;
+            pSrcP += 4;
+
+             for (x=4; x < Last8; x += 1) {
+
+#ifdef USE_STRANGE_BOB
+#include "StrangeBob.inc"
+#else
+#include "WierdBob.inc"
+#endif
+
+             // We will keep a slight bias to using the weave pixels
+             // from the current location, by rating them by the min distance
+             // from the Bob value instead of the avg distance from that value.
+             // our best and only rating so far
+             diff = 255;
+
+
 #endif
index c1d2b5b..73ce706 100644 (file)
@@ -4,7 +4,7 @@
                // Assume our pixels are layed out as follows with x the calc'd bob value
                // and the other pixels are from the current field
                //  
-               //                j a b c k             current field
+               //        j a b c k             current field
                //            x                 calculated line
                //        m d e f n             current field
                //
@@ -26,6 +26,8 @@
         // end if
         // pickup any thing not yet set with avg(b,e)
 
+#ifndef IS_C
+
                // j, n
         "pxor %%mm5, %%mm5\n\t"
         "pxor %%mm6, %%mm6\n\t"
@@ -48,7 +50,7 @@
                "movq    -4(%%"XBX"), %%mm0\n\t"                // value j
                "movq    4(%%"XBX", %%"XCX"), %%mm1\n\t"        // value n
                "movq   %%mm0, %%mm2\n\t"                                       
-               "pavgb  %%mm1, %%mm2\n\t"                                       // avg(j,n)
+               V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(j,n)
         "movq  %%mm0, %%mm3\n\t"
                "psubusb        %%mm1, %%mm0\n\t"
                "psubusb %%mm3, %%mm1\n\t"
@@ -60,7 +62,6 @@
                "pcmpeqb %%mm3, %%mm1\n\t"                      // now ff where abs(j,n) < Thres, else 00       
 
         "pand    %%mm4, %%mm1\n\t"
-        
         "pand    %%mm1, %%mm2\n\t"
         "pand    %%mm1, %%mm0\n\t"
 
                "por            %%mm2, %%mm6\n\t"                       // our x2 value
                "por            %%mm1, %%mm7\n\t"                       // our x2 diffs
                "movq   %%mm7, %%mm4\n\t"                       // save as bob uncertainty indicator
+
+#else
+
+        diff = -1;
+       best = 0;
+       // j, n
+        if (ABS (pBob[-2] - pBob[src_pitch2 - 4]) < DiffThres &&
+           ABS (pBob[-4] - pBob[src_pitch2 + 4]) > DiffThres) {
+          best = (pBob[-2] + pBob[src_pitch2 - 4]) / 2;
+          diff = ABS (pBob[-2] - pBob[src_pitch2 - 4]);
+       }
+
+        // k & m
+        if (ABS (pBob[2] - pBob[src_pitch2 + 4]) < DiffThres &&
+           ABS (pBob[4] - pBob[src_pitch2 - 4]) > DiffThres) {
+          best = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
+          diff = ABS (pBob[4] - pBob[src_pitch2 - 4]);
+       }
+
+        // c & d
+       if (ABS (pBob[0] - pBob[src_pitch2 + 2]) < DiffThres &&
+           ABS (pBob[2] - pBob[src_pitch2 - 2]) > DiffThres) {
+          best = (pBob[2] + pBob[src_pitch2 - 2]) / 2;
+          diff = ABS (pBob[2] - pBob[src_pitch2 - 2]);
+       }
+
+        // a & f
+       if (ABS (pBob[0] - pBob[src_pitch2 - 2]) < DiffThres &&
+           ABS (pBob[-2] - pBob[src_pitch2 + 2]) > DiffThres) {
+          best = (pBob[-2] + pBob[src_pitch2 + 2]) / 2;
+          diff = ABS (pBob[-2] - pBob[src_pitch2 + 2]);
+       }
+
+       // b,e
+       if (ABS (pBob[0] - pBob[src_pitch2]) < DiffThres) {
+          best = (pBob[0] + pBob[src_pitch2]) / 2;
+          diff = ABS (pBob[0] - pBob[src_pitch2]);
+       }
+
+// We will also calc here the max/min values to later limit comb
+// so the max excursion will not exceed the Max_Comb constant
+
+#ifdef SKIP_SEARCH
+               best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+#else
+               mov = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2]));
+
+               MinVals = 0;
+               MaxVals = 255;
+               if (mov > DiffThres) {
+                 MinVals = MAX (MIN (pBob[0], pBob[src_pitch2]), best);
+                 MaxVals = MIN (MAX (pBob[0], pBob[src_pitch2]), best);
+               }
+
+               best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+#endif
+
+               avg = (pBob[src_pitch2] + pBob[0]) / 2;
+               diff2 = ABS (pBob[src_pitch2] - pBob[0]);
+
+               if (diff == -1 || diff2 < diff) {
+                 best = avg;
+                 diff = diff2;
+               }
+#endif
index daa3809..835098a 100644 (file)
 #define SEFUNC(x) Search_Effort_MMXEXT_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight)
 #elif defined(IS_3DNOW)
 #define SEFUNC(x) Search_Effort_3DNOW_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight)
-#else
+#elif defined(IS_MMX)
 #define SEFUNC(x) Search_Effort_MMX_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight)
+#else
+#define SEFUNC(x) Search_Effort_C_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight)
 #endif
 
 #include "TomsMoCompAll2.inc"
 #define SEFUNC(x) Search_Effort_MMXEXT_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight)
 #elif defined(IS_3DNOW)
 #define SEFUNC(x) Search_Effort_3DNOW_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight)
-#else
+#elif defined(IS_MMX)
 #define SEFUNC(x) Search_Effort_MMX_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight)
+#else
+#define SEFUNC(x) Search_Effort_C_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight)
 #endif
 
 void FUNCT_NAME(GstDeinterlaceMethod *d_method, GstDeinterlace2* object)
@@ -231,7 +235,7 @@ void FUNCT_NAME(GstDeinterlaceMethod *d_method, GstDeinterlace2* object)
        }
     }
 
-#ifdef HAVE_CPU_I386
+#if defined(BUILD_X86_ASM) && !defined(IS_C)
   __asm__ __volatile__("emms");
 #endif
 }
index 6d3447e..6a68f08 100644 (file)
@@ -21,25 +21,45 @@ static inline int SEARCH_EFFORT_FUNC(0)             // we don't try at all ;-)
 
 static inline int SEARCH_EFFORT_FUNC(1)
 {
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
                //see Search_Effort_Max() for comments
 #include "SearchLoopTop.inc"
        RESET_CHROMA            // pretend chroma diffs was 255 each
 #include "SearchLoop0A.inc"
 #include "SearchLoopBottom.inc"
+#endif
 }
 
 static inline int SEARCH_EFFORT_FUNC(3)
 {
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
                //see Search_Effort_Max() for comments
 #include "SearchLoopTop.inc"
 #include "SearchLoopOddA2.inc"
        RESET_CHROMA            // pretend chroma diffs was 255 each
 #include "SearchLoop0A.inc"
 #include "SearchLoopBottom.inc"
+#endif
 }
 
 static inline int SEARCH_EFFORT_FUNC(5)
 {
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
                //see Search_Effort_Max() for comments
 #include "SearchLoopTop.inc"
 #include "SearchLoopOddA2.inc"
@@ -47,11 +67,18 @@ static inline int SEARCH_EFFORT_FUNC(5)
        RESET_CHROMA            // pretend chroma diffs was 255 each
 #include "SearchLoop0A.inc"
 #include "SearchLoopBottom.inc"
+#endif
 }
 
 // 3x3 search
 static inline int SEARCH_EFFORT_FUNC(9)
 {
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
                //see SearchEffortMax() for comments
 #include "SearchLoopTop.inc"
 #include "SearchLoopOddA.inc"
@@ -59,11 +86,18 @@ static inline int SEARCH_EFFORT_FUNC(9)
 #include "SearchLoopVA.inc"
 #include "SearchLoop0A.inc"
 #include "SearchLoopBottom.inc"
+#endif
 }
 
 // Search 9 with 2 H-half pels added
 static inline int SEARCH_EFFORT_FUNC(11)
 {
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
                //see SearchEffortMax() for comments
 #include "SearchLoopTop.inc"
 #include "SearchLoopOddA.inc"
@@ -72,11 +106,18 @@ static inline int SEARCH_EFFORT_FUNC(11)
 #include "SearchLoopVA.inc"
 #include "SearchLoop0A.inc"
 #include "SearchLoopBottom.inc"
+#endif
 }
 
 // Search 11 with 2 V-half pels added
 static inline int SEARCH_EFFORT_FUNC(13)
 {
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
                //see SearchEffortMax() for comments
 #include "SearchLoopTop.inc"
 #include "SearchLoopOddA.inc"
@@ -86,11 +127,18 @@ static inline int SEARCH_EFFORT_FUNC(13)
 #include "SearchLoopVA.inc"
 #include "SearchLoop0A.inc"
 #include "SearchLoopBottom.inc"
+#endif
 }
 
 // 5x3
 static inline int SEARCH_EFFORT_FUNC(15)
 {
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
                //see SearchEffortMax() for comments
 #include "SearchLoopTop.inc"
 #include "SearchLoopOddA.inc"
@@ -99,11 +147,18 @@ static inline int SEARCH_EFFORT_FUNC(15)
 #include "SearchLoopVA.inc"
 #include "SearchLoop0A.inc"
 #include "SearchLoopBottom.inc"
+#endif
 }
 
 // 5x3 + 4 half pels
 static inline int SEARCH_EFFORT_FUNC(19)
 {
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
                //see SearchEffortMax() for comments
 #include "SearchLoopTop.inc"
 #include "SearchLoopOddA.inc"
@@ -114,6 +169,7 @@ static inline int SEARCH_EFFORT_FUNC(19)
 #include "SearchLoopVA.inc"
 #include "SearchLoop0A.inc"
 #include "SearchLoopBottom.inc"
+#endif
 }
 
 // Handle one 4x1 block of pixels
@@ -121,6 +177,12 @@ static inline int SEARCH_EFFORT_FUNC(19)
 
 static inline int SEARCH_EFFORT_FUNC(21)
 {
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
                //see SearchLoopTop.inc for comments
 #include "SearchLoopTop.inc"
 
@@ -140,12 +202,19 @@ static inline int SEARCH_EFFORT_FUNC(21)
                // blend our results and loop
 #include "SearchLoop0A.inc"
 #include "SearchLoopBottom.inc"
+#endif
 }
 
 // Handle one 4x1 block of pixels
 // Search a 9x3 area, no half pels
 static inline int SEARCH_EFFORT_FUNC(Max)
 {
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
                //see SearchLoopTop.inc for comments
 #include "SearchLoopTop.inc"
 
@@ -167,6 +236,7 @@ static inline int SEARCH_EFFORT_FUNC(Max)
                // blend our results and loop
 #include "SearchLoop0A.inc"
 #include "SearchLoopBottom.inc"
+#endif
 }
 
 #undef SEARCH_EFFORT_FUNC
index 36fd9d2..6cbd1b8 100644 (file)
@@ -13,6 +13,7 @@
                 
                // selected for the     smallest of abs(a,f), abs(c,d), or abs(b,e), etc.
 
+#ifndef IS_C
                // a,f
                "movq    -2(%%"XBX"), %%mm0\n\t"                // value a from top left                
                "movq    2(%%"XBX", %%"XCX"), %%mm1\n\t"        // value f from bottom right                    
                "pxor   %%mm4, %%mm4\n\t"                       
                "psubusb %%mm7, %%mm3\n\t"                      // nonzero where new weights bigger, else 0
                "pcmpeqb %%mm4, %%mm3\n\t"                      // now ff where new better, else 00     
+
                "pcmpeqb        %%mm3, %%mm4\n\t"                       // here ff where old better, else 00
 
                "pand   %%mm3, %%mm1\n\t"
                "por            %%mm1, %%mm7\n\t"                       // our x2 diffs
                "movq   %%mm7, %%mm4\n\t"                       // save as bob uncertainty indicator
 
+#else
+
+        // a,f
+        best = (pBob[-2] + pBob[src_pitch2 + 2]) / 2;
+       diff = ABS (pBob[-2] - pBob[src_pitch2 + 2]);
+
+        // c,d
+       if (ABS (pBob[2] - pBob[src_pitch2 - 2]) < diff) {
+          best = (pBob[2] + pBob[src_pitch2 - 2]) / 2;
+         diff = ABS (pBob[2] - pBob[src_pitch2 - 2]);
+       }
+
+       // j,n
+       if (ABS (pBob[-4] - pBob[src_pitch2 + 4]) < diff) {
+          best = (pBob[-4] + pBob[src_pitch2 + 4]) / 2;
+         diff = ABS (pBob[-4] - pBob[src_pitch2 + 4]);
+       }
+
+       // k,m
+       if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff) {
+          best = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
+         diff = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
+       }
+
+       // k,m
+       if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff) {
+          best = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
+         diff = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
+       }
+
+// We will also calc here the max/min values to later limit comb
+// so the max excursion will not exceed the Max_Comb constant
+
+#ifdef SKIP_SEARCH
+               best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+#else
+               mov = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2]));
+
+               MinVals = 0;
+               MaxVals = 255;
+               if (mov > Max_Mov) {
+                 MinVals = MAX (MIN (pBob[0], pBob[src_pitch2]), best);
+                 MaxVals = MIN (MAX (pBob[0], pBob[src_pitch2]), best);
+               }
+
+               best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+#endif
+
+               avg = (pBob[src_pitch2] + pBob[0]) / 2;
+               diff2 = ABS (pBob[src_pitch2] - pBob[0]);
+
+               if (diff2 < diff) {
+                 best = avg;
+                 diff = diff2;
+               }
+#endif
index 156be89..7e8147e 100644 (file)
@@ -1,8 +1,6 @@
 #include <string.h>
 #include <math.h>
 
-#define USE_FOR_DSCALER
-
 // Define a few macros for CPU dependent instructions. 
 // I suspect I don't really understand how the C macro preprocessor works but
 // this seems to get the job done.          // TRB 7/01