gst/deinterlace2/: Add a deinterlacer plugin based on the tvtime/DScaler deinterlacer...
authorMartin Eikermann <meiker@upb.de>
Wed, 11 Jun 2008 11:12:49 +0000 (11:12 +0000)
committerSebastian Dröge <slomo@circular-chaos.org>
Wed, 11 Jun 2008 11:12:49 +0000 (11:12 +0000)
Original commit message from CVS:
Based on a patch by: Martin Eikermann <meiker at upb dot de>
* gst/deinterlace2/Makefile.am:
* gst/deinterlace2/gstdeinterlace2.c:
(gst_deinterlace2_method_get_type),
(gst_deinterlace2_fields_get_type),
(gst_deinterlace2_field_layout_get_type),
(gst_deinterlace2_base_init), (gst_deinterlace2_class_init),
(gst_deinterlace2_init), (gst_deinterlace2_set_method),
(gst_deinterlace2_set_property), (gst_deinterlace2_get_property),
(gst_deinterlace2_finalize), (gst_deinterlace2_pop_history),
(gst_deinterlace2_head_history), (gst_deinterlace2_push_history),
(gst_deinterlace2_deinterlace_scanlines), (gst_deinterlace2_chain),
(gst_deinterlace2_setcaps), (gst_deinterlace2_sink_event),
(gst_deinterlace2_change_state), (gst_deinterlace2_src_event),
(gst_deinterlace2_src_query), (gst_deinterlace2_src_query_types),
(plugin_init):
* gst/deinterlace2/gstdeinterlace2.h:
* gst/deinterlace2/tvtime/greedy.c: (copy_scanline),
(deinterlace_greedy_packed422_scanline_mmxext),
(dscaler_greedyl_get_method):
* gst/deinterlace2/tvtime/greedyh.asm:
* gst/deinterlace2/tvtime/greedyh.c:
(deinterlace_frame_di_greedyh), (dscaler_greedyh_get_method),
(greedyh_init), (greedyh_filter_mmx), (greedyh_filter_3dnow),
(greedyh_filter_sse):
* gst/deinterlace2/tvtime/greedyh.h:
* gst/deinterlace2/tvtime/greedyhmacros.h:
* gst/deinterlace2/tvtime/mmx.h:
* gst/deinterlace2/tvtime/plugins.h:
* gst/deinterlace2/tvtime/speedtools.h:
* gst/deinterlace2/tvtime/speedy.c: (multiply_alpha), (clip255),
(comb_factor_packed422_scanline_mmx),
(diff_factor_packed422_scanline_c),
(diff_factor_packed422_scanline_mmx),
(diff_packed422_block8x8_mmx), (diff_packed422_block8x8_c),
(packed444_to_packed422_scanline_c),
(packed422_to_packed444_scanline_c),
(packed422_to_packed444_rec601_scanline_c),
(vfilter_chroma_121_packed422_scanline_mmx),
(vfilter_chroma_121_packed422_scanline_c),
(vfilter_chroma_332_packed422_scanline_mmx),
(vfilter_chroma_332_packed422_scanline_c),
(kill_chroma_packed422_inplace_scanline_mmx),
(kill_chroma_packed422_inplace_scanline_c),
(invert_colour_packed422_inplace_scanline_mmx),
(invert_colour_packed422_inplace_scanline_c),
(mirror_packed422_inplace_scanline_c),
(interpolate_packed422_scanline_c),
(convert_uyvy_to_yuyv_scanline_mmx),
(convert_uyvy_to_yuyv_scanline_c),
(interpolate_packed422_scanline_mmx),
(interpolate_packed422_scanline_mmxext),
(blit_colour_packed422_scanline_c),
(blit_colour_packed422_scanline_mmx),
(blit_colour_packed422_scanline_mmxext),
(blit_colour_packed4444_scanline_c),
(blit_colour_packed4444_scanline_mmx),
(blit_colour_packed4444_scanline_mmxext), (small_memcpy),
(speedy_memcpy_c), (speedy_memcpy_mmx), (speedy_memcpy_mmxext),
(blit_packed422_scanline_c), (blit_packed422_scanline_mmx),
(blit_packed422_scanline_mmxext),
(composite_colour4444_alpha_to_packed422_scanline_c),
(composite_colour4444_alpha_to_packed422_scanline_mmxext),
(composite_packed4444_alpha_to_packed422_scanline_c),
(composite_packed4444_alpha_to_packed422_scanline_mmxext),
(composite_packed4444_to_packed422_scanline_c),
(composite_packed4444_to_packed422_scanline_mmxext),
(composite_alphamask_to_packed4444_scanline_c),
(composite_alphamask_to_packed4444_scanline_mmxext),
(composite_alphamask_alpha_to_packed4444_scanline_c),
(premultiply_packed4444_scanline_c),
(premultiply_packed4444_scanline_mmxext),
(blend_packed422_scanline_c), (blend_packed422_scanline_mmxext),
(quarter_blit_vertical_packed422_scanline_mmxext),
(quarter_blit_vertical_packed422_scanline_c),
(subpix_blit_vertical_packed422_scanline_c),
(a8_subpix_blit_scanline_c), (myround), (init_RGB_to_YCbCr_tables),
(init_YCbCr_to_RGB_tables), (rgb24_to_packed444_rec601_scanline_c),
(rgba32_to_packed4444_rec601_scanline_c),
(packed444_to_rgb24_rec601_scanline_c),
(packed444_to_nonpremultiplied_packed4444_scanline_c),
(aspect_adjust_packed4444_scanline_c), (setup_speedy_calls),
(speedy_get_accel):
* gst/deinterlace2/tvtime/speedy.h:
* gst/deinterlace2/tvtime/sse.h:
* gst/deinterlace2/tvtime/tomsmocomp.c: (Fieldcopy),
(deinterlace_frame_di_tomsmocomp), (dscaler_tomsmocomp_get_method),
(tomsmocomp_init), (tomsmocomp_filter_mmx),
(tomsmocomp_filter_3dnow), (tomsmocomp_filter_sse):
* gst/deinterlace2/tvtime/tomsmocomp.h:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoop0A.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA8.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA2.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA6.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH2.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVA.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVAH.inc:
* gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc:
* gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc:
* gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc:
* gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc:
* gst/deinterlace2/tvtime/vfir.c: (deinterlace_line),
(deinterlace_scanline_vfir), (copy_scanline),
(dscaler_vfir_get_method):
* gst/deinterlace2/tvtime/x86-64_macros.inc:
Add a deinterlacer plugin based on the tvtime/DScaler deinterlacer,
which was relicensed to LGPL for GStreamer and in theory provides
better and faster results than the simple deinterlace element.
Fixes bug #163578.
Ported to GStreamer 0.10 but still not enabled or included in the
build system by default because of bad artefacts caused by a bug
somewhere and as it can be only build on x86/amd64 ATM and requires
special CFLAGS. Will be fixed soon.

16 files changed:
gst/deinterlace2/tvtime/tomsmocomp/SearchLoop0A.inc [new file with mode: 0644]
gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc [new file with mode: 0644]
gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA.inc [new file with mode: 0644]
gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA8.inc [new file with mode: 0644]
gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA.inc [new file with mode: 0644]
gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA2.inc [new file with mode: 0644]
gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA6.inc [new file with mode: 0644]
gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH.inc [new file with mode: 0644]
gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH2.inc [new file with mode: 0644]
gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc [new file with mode: 0644]
gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVA.inc [new file with mode: 0644]
gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVAH.inc [new file with mode: 0644]
gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc [new file with mode: 0644]
gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc [new file with mode: 0644]
gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc [new file with mode: 0644]
gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc [new file with mode: 0644]

diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoop0A.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoop0A.inc
new file mode 100644 (file)
index 0000000..b1d9aec
--- /dev/null
@@ -0,0 +1,15 @@
+// -*- c++ -*-
+
+// Searches just the center pixel, in both the old
+//  and new fields, but takes averages. This is an even
+// pixel address. Any chroma match will be used. (YUY2)
+// We best like finding 0 motion so we will bias everything we found previously
+// up by a little, and adjust later
+
+#ifdef IS_SSE2
+               "paddusb "_ONES", %%xmm7\n\t"                           // bias toward no motion
+#else
+               "paddusb "_ONES", %%mm7\n\t"                            // bias toward no motion
+#endif
+
+        MERGE4PIXavg("(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")")  // center, in old and new
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc
new file mode 100644 (file)
index 0000000..4b64b52
--- /dev/null
@@ -0,0 +1,116 @@
+// -*- c++ -*-       
+
+#ifdef IS_SSE2
+//sse2 code deleted for now
+#else
+
+// Version for non-SSE2
+
+#ifdef SKIP_SEARCH
+            "movq    %%mm6, %%mm0\n\t"            // just use the results of our wierd bob
+#else
+
+
+            // JA 9/Dec/2002
+            // failed experiment
+            // but leave in placeholder for me to play about
+#ifdef DONT_USE_STRANGE_BOB
+            // Use the best weave if diffs less than 10 as that
+            // means the image is still or moving cleanly
+            // if there is motion we will clip which will catch anything
+            "psubusb "_FOURS", %%mm7\n\t"          // sets bits to zero if weave diff < 4
+            "pxor    %%mm0, %%mm0\n\t"
+            "pcmpeqb %%mm0, %%mm7\n\t"            // all ff where weave better, else 00
+            "pcmpeqb %%mm7, %%mm0\n\t"            // all ff where bob better, else 00
+            "pand    %%mm6, %%mm0\n\t"            // use bob for these pixel values
+            "pand    %%mm5, %%mm7\n\t"            // use weave for these
+            "por     %%mm7, %%mm0\n\t"            // combine both
+#else
+            // Use the better of bob or weave
+            //      pminub  mm4, TENS           // the most we care about
+            V_PMINUB ("%%mm4", _TENS, "%%mm0")   // the most we care about
+            
+            "psubusb %%mm4, %%mm7\n\t"            // foregive that much from weave est?
+            "psubusb "_FOURS", %%mm7\n\t"       // bias it a bit toward weave
+            "pxor    %%mm0, %%mm0\n\t"
+            "pcmpeqb %%mm0, %%mm7\n\t"            // all ff where weave better, else 00
+            "pcmpeqb %%mm7, %%mm0\n\t"            // all ff where bob better, else 00
+            "pand    %%mm6, %%mm0\n\t"            // use bob for these pixel values
+            "pand    %%mm5, %%mm7\n\t"            // use weave for these
+            "por     %%mm7, %%mm0\n\t"            // combine both
+#endif
+            
+            
+                //      pminub  mm0, Max_Vals       // but clip to catch the stray error
+                V_PMINUB ("%%mm0", _Max_Vals, "%%mm1") // but clip to catch the stray error
+                //      pmaxub  mm0, Min_Vals
+                V_PMAXUB ("%%mm0", _Min_Vals)
+                
+#endif
+
+
+            MOVX"     "_pDest", %%"XAX"\n\t"
+                
+#ifdef USE_VERTICAL_FILTER
+            "movq    %%mm0, %%mm1\n\t"
+            //      pavgb   mm0, qword ptr["XBX"]
+            V_PAVGB ("%%mm0", "(%%"XBX")", "%%mm2", _ShiftMask)
+            //      movntq  qword ptr["XAX"+"XDX"], mm0
+            V_MOVNTQ ("(%"XAX", %%"XDX")", "%%mm0")
+            //      pavgb   mm1, qword ptr["XBX"+"XCX"]
+            V_PAVGB ("%%mm1", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask)
+            "addq   "_dst_pitchw", %%"XBX
+            //      movntq  qword ptr["XAX"+"XDX"], mm1
+            V_MOVNTQ ("(%%"XAX", %%"XDX")", "%%mm1")
+#else
+                
+            //      movntq  qword ptr["XAX"+"XDX"], mm0
+                V_MOVNTQ ("(%%"XAX", %%"XDX")", "%%mm0")
+#endif
+                
+           LEAX"    8(%%"XDX"), %%"XDX"\n\t"       // bump offset pointer
+           CMPX"    "_Last8", %%"XDX"\n\t"       // done with line?
+           "jb      1b\n\t"                    // y
+#endif
+
+           MOVX" "_oldbx", %%"XBX"\n\t"
+
+        : /* no outputs */
+
+        : "m"(pBob),
+          "m"(src_pitch2),
+          "m"(ShiftMask),
+          "m"(pDest),
+          "m"(dst_pitchw),
+          "m"(Last8),
+          "m"(pSrc),
+          "m"(pSrcP),
+          "m"(pBobP),
+          "m"(DiffThres),
+          "m"(Min_Vals),
+          "m"(Max_Vals),
+          "m"(FOURS),
+          "m"(TENS),
+          "m"(ONES),
+          "m"(UVMask),
+          "m"(Max_Mov),
+          "m"(YMask),
+          "m"(oldbx)
+
+        : XAX, XCX, XDX, XSI, XDI,
+#ifdef ARCH_386
+          "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
+#endif
+          "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
+          "memory", "cc"
+        );
+
+        // adjust for next line
+        pSrc  += src_pitch2;
+        pSrcP += src_pitch2;
+        pDest += dst_pitch2;
+        pBob  += src_pitch2;
+        pBobP += src_pitch2;
+    }
+    
+    return 0;
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA.inc
new file mode 100644 (file)
index 0000000..6208fe8
--- /dev/null
@@ -0,0 +1,11 @@
+// -*- c++ -*-
+
+// Searches 2 pixel to the left and right, in both the old
+//  and new fields, but takes averages. These are even
+// pixel addresses. Chroma match will be used. (YUY2)
+        MERGE4PIXavg("-4(%%"XDI")", "4(%%"XSI", %%"XCX", 2)")  // up left, down right
+        MERGE4PIXavg("4(%%"XDI")", "-4(%%"XSI", %%"XCX", 2)")  // up right, down left
+        MERGE4PIXavg("-4(%%"XDI", %%"XCX")", "4(%%"XSI", %%"XCX")") // left, right
+        MERGE4PIXavg("4(%%"XDI", %%"XCX")", "-4(%%"XSI", %%"XCX")") // right, left
+        MERGE4PIXavg("-4(%%"XDI", %%"XCX", 2)", "4(%%"XSI")")   // down left, up right
+        MERGE4PIXavg("4(%%"XDI", %%"XCX", 2)", "-4(%%"XSI")")   // down right, up left
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA8.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA8.inc
new file mode 100644 (file)
index 0000000..2841c3f
--- /dev/null
@@ -0,0 +1,12 @@
+// -*- c++ -*-
+
+// Searches 4 pixel to the left and right, in both the old
+//  and new fields, but takes averages. These are even
+// pixel addresses. Chroma match will be used. (YUY2)
+        MERGE4PIXavg("-8(%%"XDI")", "8(%%"XSI", %%"XCX", 2)")  // up left, down right
+        MERGE4PIXavg("8(%%"XDI")", "-8(%%"XSI", %%"XCX", 2)")  // up right, down left
+        MERGE4PIXavg("-8(%%"XDI", %%"XCX")", "8(%%"XSI", %%"XCX")") // left, right
+        MERGE4PIXavg("8(%%"XDI", %%"XCX")", "-8(%%"XSI", %%"XCX")") // right, left
+        MERGE4PIXavg("-8(%%"XDI", %%"XCX", 2)", "8(%%"XSI")")   // down left, up right
+        MERGE4PIXavg("8(%%"XDI", %%"XCX", 2)", "-8(%%"XSI")")   // down right, up left
+
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA.inc
new file mode 100644 (file)
index 0000000..ab5375f
--- /dev/null
@@ -0,0 +1,10 @@
+// -*- c++ -*-
+
+// Searches 1 pixel to the left and right, in both the old
+//  and new fields, but takes averages. These are odd
+// pixel addresses. Any chroma match will not be used. (YUY2)
+        MERGE4PIXavg("-2(%%"XDI")", "2(%%"XSI", %%"XCX", 2)")  // up left, down right
+        MERGE4PIXavg("2(%%"XDI")", "-2(%%"XSI", %%"XCX", 2)")  // up right, down left
+        MERGE4PIXavg("-2(%%"XDI", %%"XCX", 2)", "2(%%"XSI")")   // down left, up right
+        MERGE4PIXavg("2(%%"XDI", %%"XCX", 2)", "-2(%%"XSI")")   // down right, up left   
+#include "SearchLoopOddA2.inc"
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA2.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA2.inc
new file mode 100644 (file)
index 0000000..fd3f6fb
--- /dev/null
@@ -0,0 +1,5 @@
+// Searches 1 pixel to the left and right, in both the old
+// and new fields, but takes averages. These are odd
+// pixel addresses. Any chroma match will not be used. (YUY2)
+        MERGE4PIXavg("-2(%%"XDI", %%"XCX")", "2(%%"XSI", %%"XCX")") // left, right
+        MERGE4PIXavg("2(%%"XDI", %%"XCX")", "-2(%%"XSI", %%"XCX")") // right, left
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA6.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA6.inc
new file mode 100644 (file)
index 0000000..cbae014
--- /dev/null
@@ -0,0 +1,11 @@
+// -*- c++ -*-
+
+// Searches 3 pixels to the left and right, in both the old
+//  and new fields, but takes averages. These are odd
+// pixel addresses. Any chroma match will not be used. (YUY2)
+        MERGE4PIXavg("-6(%%"XDI")", "6(%%"XSI", %%"XCX", 2)")  // up left, down right
+        MERGE4PIXavg("6(%%"XDI")", "-6(%%"XSI", %%"XCX", 2)")  // up right, down left
+        MERGE4PIXavg("-6(%%"XDI", %%"XCX")", "6(%%"XSI", %%"XCX")") // left, right
+        MERGE4PIXavg("6(%%"XDI", %%"XCX")", "-6(%%"XSI", %%"XCX")") // right, left
+        MERGE4PIXavg("-6(%%"XDI", %%"XCX", 2)", "6(%%"XSI")")   // down left, up right
+        MERGE4PIXavg("6(%%"XDI", %%"XCX", 2)", "-6(%%"XSI")")   // down right, up left
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH.inc
new file mode 100644 (file)
index 0000000..e59e3c7
--- /dev/null
@@ -0,0 +1,10 @@
+// Searches 1 pixel to the left and right, in both the old\r
+//  and new fields, but takes v-half pel averages. These are odd\r
+// pixel addresses. Any chroma match will not be used. (YUY2)\r
+               __asm\r
+               {\r
+        MERGE4PIXavgH("XDI"-2, "XDI"+"XCX"-2, "XSI"+"XCX"+2, "XSI"+2*"XCX"+2)  // up left, down right\r
+        MERGE4PIXavgH("XDI"+2, "XDI"+"XCX"+2, "XSI"+"XCX"-2, "XSI"+2*"XCX"-2)   // up right, down left\r
+        MERGE4PIXavgH("XDI"+2*"XCX"-2, "XDI"+"XCX"-2, "XSI"+"XCX"+2, "XSI"+2)   // down left, up right\r
+        MERGE4PIXavgH("XDI"+2*"XCX"+2, "XDI"+"XCX"+2, "XSI"+"XCX"-2, "XSI"-2)   // down right, up left   \r
+               }\r
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH2.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH2.inc
new file mode 100644 (file)
index 0000000..cd7d812
--- /dev/null
@@ -0,0 +1,5 @@
+// Searches 1 pixel to the left and right, in both the old
+//  and new fields, but takes vertical averages. These are odd
+// pixel addresses. Any chroma match will not be used. (YUY2)
+     MERGE4PIXavgH("-2(%%"XDI", %%"XCX")", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "2(%%"XSI", %%"XCX")") // left, right
+     MERGE4PIXavgH("2(%%"XDI", %%"XCX")", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "-2(%%"XSI", %%"XCX")") // right, left
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc
new file mode 100644 (file)
index 0000000..7560f40
--- /dev/null
@@ -0,0 +1,193 @@
+// -*- c++ -*-
+
+unsigned char* pDest;
+const unsigned char* pSrcP;
+const unsigned char* pSrc;
+const unsigned char* pBob;
+const unsigned char* pBobP;
+
+int64_t Max_Mov   = 0x0404040404040404ull; 
+int64_t DiffThres = 0x0f0f0f0f0f0f0f0full; 
+int64_t YMask     = 0x00ff00ff00ff00ffull; // keeps only luma
+int64_t UVMask    = 0xff00ff00ff00ff00ull; // keeps only chroma
+int64_t TENS      = 0x0a0a0a0a0a0a0a0aull; 
+int64_t FOURS     = 0x0404040404040404ull; 
+int64_t ONES      = 0x0101010101010101ull; 
+int64_t Min_Vals  = 0x0000000000000000ull;
+int64_t Max_Vals  = 0x0000000000000000ull;
+int64_t ShiftMask = 0xfefffefffefffeffull;
+
+// long is int32 on ARCH_368, int64 on ARCH_AMD64. Declaring it this way
+// saves a lot of xor's to delete 64bit garbage.
+
+#if defined(DBL_RESIZE) || defined(USE_FOR_DSCALER)
+long       src_pitch2 = src_pitch;                     // even & odd lines are not interleaved in DScaler
+#else
+long       src_pitch2 = 2 * src_pitch;         // even & odd lines are interleaved in Avisynth
+#endif
+
+
+long       dst_pitch2 = 2 * dst_pitch;
+long     y;
+
+#ifdef IS_SSE2
+long     Last8 = (rowsize-16);                 // ofs to last 16 bytes in row for SSE2
+#else
+long     Last8 = (rowsize-8);                  // ofs to last 8 bytes in row
+#endif
+
+long           dst_pitchw = dst_pitch; // local stor so asm can ref
+       pSrc  = pWeaveSrc;                      // points 1 weave line above
+       pSrcP = pWeaveSrcP;                     // " 
+
+#ifdef DBL_RESIZE
+               
+#ifdef USE_VERTICAL_FILTER
+       pDest = pWeaveDest + dst_pitch2;
+#else
+       pDest = pWeaveDest + 3*dst_pitch;
+#endif
+
+#else
+
+#ifdef USE_VERTICAL_FILTER
+       pDest = pWeaveDest + dst_pitch;
+#else
+       pDest = pWeaveDest + dst_pitch2;
+#endif
+
+#endif
+
+       if (TopFirst)
+       {
+               pBob = pCopySrc + src_pitch2;      // remember one weave line just copied previously
+               pBobP = pCopySrcP + src_pitch2;
+       }
+       else
+       {
+               pBob =  pCopySrc;
+               pBobP =  pCopySrcP;
+       }
+
+#ifndef _pBob
+#define _pBob       "%0"
+#define _src_pitch2 "%1"
+#define _ShiftMask  "%2"
+#define _pDest      "%3"
+#define _dst_pitchw "%4"
+#define _Last8      "%5"
+#define _pSrc       "%6"
+#define _pSrcP      "%7"
+#define _pBobP      "%8"
+#define _DiffThres  "%9"
+#define _Min_Vals   "%10"
+#define _Max_Vals   "%11"
+#define _FOURS      "%12"
+#define _TENS       "%13"
+#define _ONES       "%14"
+#define _UVMask     "%15"
+#define _Max_Mov    "%16"
+#define _YMask      "%17"
+#define _oldbx      "%18"
+#endif
+
+        long oldbx;
+
+       for (y=1; y < FldHeight-1; y++) 
+       {
+               // pretend it's indented -->>
+        __asm__ __volatile__
+            (
+             // Loop general reg usage
+             //
+             // XAX - pBobP, then pDest 
+             // XBX - pBob
+             // XCX - src_pitch2
+             // XDX - current offset
+             // XDI - prev weave pixels, 1 line up
+             // XSI - next weave pixels, 1 line up
+
+             // Save "XBX" (-fPIC)
+            MOVX" %%"XBX", "_oldbx"\n\t"
+             
+#ifdef IS_SSE2
+             
+             // sse2 code deleted for now
+
+#else
+             // simple bob first 8 bytes
+             MOVX"     "_pBob",        %%"XBX"\n\t"
+             MOVX"     "_src_pitch2",  %%"XCX"\n\t"
+
+#ifdef USE_VERTICAL_FILTER
+             "movq         (%%"XBX"),        %%mm0\n\t"
+             "movq         (%%"XBX", %%"XCX"), %%mm1\n\t" //, qword ptr["XBX"+"XCX"]
+             "movq         %%mm0,          %%mm2\n\t"
+             V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)           // halfway between
+             V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask)           // 1/4 way
+             V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask)           // 3/4 way
+             MOVX"             "_pDest",       %%"XDI"\n\t"
+             MOVX"             "_dst_pitchw",  %%"XAX"\n\t"
+             V_MOVNTQ  ("(%%"XDI")", "%%mm0")
+             V_MOVNTQ  ("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1
+
+             // simple bob last 8 bytes
+             MOVX"             "_Last8", %%"XDX"\n\t"
+             LEAX"             (%%"XBX", %%"XDX"), %%"XSI"\n\t"  // ["XBX"+"XDX"]
+             "movq         (%%"XSI"), %%mm0\n\t"
+             "movq         (%%"XSI", %%"XCX"), %%mm1\n\t"    // qword ptr["XSI"+"XCX"]
+             "movq         %%mm0, %%mm2\n\t"
+             V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)           // halfway between
+             V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask)           // 1/4 way
+             V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask)           // 3/4 way
+             ADDX"             %%"XDX", %%"XDI"\n\t"                                           // last 8 bytes of dest
+             V_MOVNTQ  ("%%"XDI"", "%%mm0")
+             V_MOVNTQ  ("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1)
+
+#else
+             "movq     (%%"XBX"), %%mm0\n\t"
+             //                pavgb   mm0, qword ptr["XBX"+"XCX"]
+             V_PAVGB ("%%mm0", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XBX"+"XCX"], mm2, ShiftMask)
+             MOVX"             "_pDest", %%"XDI"\n\t"
+             V_MOVNTQ  ("(%%"XDI")", "%%mm0")
+
+             // simple bob last 8 bytes
+             MOVX"             "_Last8", %%"XDX"\n\t"
+             LEAX"             (%%"XBX", %%"XDX"), %%"XSI"\n\t" //"XSI", ["XBX"+"XDX"]
+             "movq         (%%"XSI"), %%mm0\n\t"
+             //                pavgb   mm0, qword ptr["XSI"+"XCX"]
+             V_PAVGB   ("%%mm0", "(%%"XSI", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XSI"+"XCX"], mm2, ShiftMask)
+             V_MOVNTQ  ("(%%"XDI", %%"XDX")", "%%mm0") // qword ptr["XDI"+"XDX"], mm0)
+#endif
+             // now loop and get the middle qwords
+             MOVX"             "_pSrc", %%"XSI"\n\t"
+             MOVX"             "_pSrcP", %%"XDI"\n\t"
+             MOVX"             $8, %%"XDX"\n\t"                                // curr offset longo all lines
+
+             "1:\n\t"  
+             MOVX"             "_pBobP", %%"XAX"\n\t"
+             ADDX"             $8, %%"XDI"\n\t"
+             ADDX"             $8, %%"XSI"\n\t"
+             ADDX"             $8, %%"XBX"\n\t"
+             ADDX"             %%"XDX", %%"XAX"\n\t"
+
+#ifdef USE_STRANGE_BOB
+#include "StrangeBob.inc"
+#else
+#include "WierdBob.inc"
+#endif
+
+             // For non-SSE2:
+             // through out most of the rest of this loop we will maintain
+             //        mm4             our min bob value
+             //        mm5             best weave pixels so far
+             // mm6            our max Bob value 
+             //        mm7             best weighted pixel ratings so far
+             
+             // We will keep a slight bias to using the weave pixels
+             // from the current location, by rating them by the min distance
+             // from the Bob value instead of the avg distance from that value.
+             // our best and only rating so far
+             "pcmpeqb  %%mm7, %%mm7\n\t"                       // ffff, say we didn't find anything good yet
+
+#endif
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVA.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVA.inc
new file mode 100644 (file)
index 0000000..3e3d19b
--- /dev/null
@@ -0,0 +1,6 @@
+// -*- c++ -*-
+
+// Searches the center vertical line above center and below, in both the old 
+// and new fields, but takes averages.  These are even pixel addresses.
+        MERGE4PIXavg("(%%"XDI", %%"XCX", 2)", "(%%"XSI")")     // down, up
+        MERGE4PIXavg("(%%"XDI")", "(%%"XSI", %%"XCX", 2)")     // up, down
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVAH.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVAH.inc
new file mode 100644 (file)
index 0000000..33155bc
--- /dev/null
@@ -0,0 +1,6 @@
+// -*- c++ -*-
+
+// Searches the center vertical line above center and below, in both the old 
+// and new fields, but takes averages.  These are even pixel addresses.
+        MERGE4PIXavgH("(%%"XDI", %%"XCX", 2)", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "(%%"XSI")")        // down, up
+        MERGE4PIXavgH("(%%"XDI")", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "(%%"XSI", %%"XCX", 2)")        // up, down
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc b/gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc
new file mode 100644 (file)
index 0000000..c1d2b5b
--- /dev/null
@@ -0,0 +1,322 @@
+// -*- c++ -*-
+               
+               // First, get and save our possible Bob values
+               // Assume our pixels are layed out as follows with x the calc'd bob value
+               // and the other pixels are from the current field
+               //  
+               //                j a b c k             current field
+               //            x                 calculated line
+               //        m d e f n             current field
+               //
+               // we calc the bob value luma value as:
+        // if |j - n| < Thres && |a - m| > Thres 
+        //  avg(j,n)
+        // end if
+        // if |k - m| < Thres && |c - n| > Thres 
+        //  avg(k,m)
+        // end if
+        // if |c - d| < Thres && |b - f| > Thres 
+        //  avg(c,d)
+        // end if
+        // if |a - f| < Thres && |b - d| > Thres 
+        //  avg(a,f)
+        // end if
+        // if |b - e| < Thres
+        //  avg(b,e)
+        // end if
+        // pickup any thing not yet set with avg(b,e)
+
+               // j, n
+        "pxor %%mm5, %%mm5\n\t"
+        "pxor %%mm6, %%mm6\n\t"
+        "pxor %%mm7, %%mm7\n\t"
+
+               "movq    -2(%%"XBX"), %%mm0\n\t"                // value a from top left                
+               "movq    -4(%%"XBX", %%"XCX"), %%mm1\n\t"       // value m from bottom right                    
+        
+               "movq   %%mm0, %%mm3\n\t"
+               "psubusb        %%mm1, %%mm3\n\t"
+               "psubusb %%mm0, %%mm1\n\t"
+               "por            %%mm1, %%mm3\n\t"                                       // abs(a,m)
+
+               "psubusb "_DiffThres", %%mm3\n\t"               // nonzero where abs(a,m) > Thres else 0
+               "pxor   %%mm4, %%mm4\n\t"
+               "pcmpeqb %%mm4, %%mm3\n\t"                      // now ff where abs(a,m) < Thres, else 00       
+               "pcmpeqb        %%mm3, %%mm4\n\t"                       // here ff where abs(a,m) > Thres, else 00
+
+
+               "movq    -4(%%"XBX"), %%mm0\n\t"                // value j
+               "movq    4(%%"XBX", %%"XCX"), %%mm1\n\t"        // value n
+               "movq   %%mm0, %%mm2\n\t"                                       
+               "pavgb  %%mm1, %%mm2\n\t"                                       // avg(j,n)
+        "movq  %%mm0, %%mm3\n\t"
+               "psubusb        %%mm1, %%mm0\n\t"
+               "psubusb %%mm3, %%mm1\n\t"
+               "por            %%mm1, %%mm0\n\t"                                       // abs(j,n)
+
+        "movq    %%mm0, %%mm1\n\t"
+               "psubusb "_DiffThres", %%mm1\n\t"               // nonzero where abs(j,n) > Thres else 0
+               "pxor   %%mm3, %%mm3\n\t"
+               "pcmpeqb %%mm3, %%mm1\n\t"                      // now ff where abs(j,n) < Thres, else 00       
+
+        "pand    %%mm4, %%mm1\n\t"
+        
+        "pand    %%mm1, %%mm2\n\t"
+        "pand    %%mm1, %%mm0\n\t"
+
+        "movq    %%mm1, %%mm3\n\t"
+        "pxor    %%mm5, %%mm3\n\t"
+        "pand    %%mm3, %%mm6\n\t"
+        "pand    %%mm3, %%mm7\n\t"
+        "pand    %%mm3, %%mm5\n\t"
+
+        "por     %%mm1, %%mm5\n\t"
+        "por     %%mm2, %%mm6\n\t"
+        "por     %%mm0, %%mm7\n\t"
+        
+        // k & m
+               "movq    2(%%"XBX"), %%mm0\n\t"         // value c from top left                
+               "movq    4(%%"XBX", %%"XCX"), %%mm1\n\t"        // value n from bottom right                    
+
+               "movq   %%mm0, %%mm3\n\t"
+               "psubusb        %%mm1, %%mm3\n\t"
+               "psubusb %%mm0, %%mm1\n\t"
+               "por            %%mm1, %%mm3\n\t"                                       // abs(c,n)
+
+               "psubusb "_DiffThres", %%mm3\n\t"               // nonzero where abs(c,n) > Thres else 0
+               "pxor   %%mm4, %%mm4\n\t"
+               "pcmpeqb %%mm4, %%mm3\n\t"                      // now ff where abs(c,n) < Thres, else 00       
+               "pcmpeqb        %%mm3, %%mm4\n\t"                       // here ff where abs(c,n) > Thres, else 00
+
+
+               "movq    4(%%"XBX"), %%mm0\n\t"         // value k
+               "movq    -4(%%"XBX", %%"XCX"), %%mm1\n\t"       // value m
+               "movq   %%mm0, %%mm2\n\t"                                       
+               V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(k,m)
+        "movq  %%mm0, %%mm3\n\t"
+               "psubusb        %%mm1, %%mm0\n\t"
+               "psubusb %%mm3, %%mm1\n\t"
+               "por            %%mm1, %%mm0\n\t"                                       // abs(k,m)
+
+        "movq    %%mm0, %%mm1\n\t"
+               "psubusb "_DiffThres", %%mm1\n\t"               // nonzero where abs(k,m) > Thres else 0
+               "pxor   %%mm3, %%mm3\n\t"
+               "pcmpeqb %%mm3, %%mm1\n\t"                      // now ff where abs(k,m) < Thres, else 00       
+
+        "pand    %%mm4, %%mm1\n\t"
+        
+        "pand    %%mm1, %%mm2\n\t"
+        "pand    %%mm1, %%mm0\n\t"
+
+        "movq    %%mm1, %%mm3\n\t"
+        "pxor    %%mm5, %%mm3\n\t"
+        "pand    %%mm3, %%mm6\n\t"
+        "pand    %%mm3, %%mm7\n\t"
+        "pand    %%mm3, %%mm5\n\t"
+
+        "por     %%mm1, %%mm5\n\t"
+        "por     %%mm2, %%mm6\n\t"
+        "por     %%mm0, %%mm7\n\t"
+
+
+        // c & d
+               "movq    (%%"XBX"), %%mm0\n\t"          // value b from top left                
+               "movq    2(%%"XBX", %%"XCX"), %%mm1\n\t"        // value f from bottom right                    
+
+               "movq   %%mm0, %%mm3\n\t"
+               "psubusb        %%mm1, %%mm3\n\t"
+               "psubusb %%mm0, %%mm1\n\t"
+               "por            %%mm1, %%mm3\n\t"                                       // abs(b,f)
+
+               "psubusb "_DiffThres", %%mm3\n\t"               // nonzero where abs(b,f) > Thres else 0
+               "pxor   %%mm4, %%mm4\n\t"
+               "pcmpeqb %%mm4, %%mm3\n\t"                      // now ff where abs(b,f) < Thres, else 00       
+               "pcmpeqb        %%mm3, %%mm4\n\t"                       // here ff where abs(b,f) > Thres, else 00
+
+               "movq    2(%%"XBX"), %%mm0\n\t"         // value c
+               "movq    -2(%%"XBX", %%"XCX"), %%mm1\n\t"       // value d
+               "movq   %%mm0, %%mm2\n\t"                                       
+               V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(c,d)
+        "movq  %%mm0, %%mm3\n\t"
+               "psubusb        %%mm1, %%mm0\n\t"
+               "psubusb %%mm3, %%mm1\n\t"
+               "por            %%mm1, %%mm0\n\t"                                       // abs(c,d)
+
+        "movq    %%mm0, %%mm1\n\t"
+               "psubusb "_DiffThres", %%mm1\n\t"               // nonzero where abs(c,d) > Thres else 0
+               "pxor   %%mm3, %%mm3\n\t"
+        "pcmpeqb %%mm3, %%mm1\n\t"                     // now ff where abs(c,d) < Thres, else 00       
+
+        "pand    %%mm4, %%mm1\n\t"
+
+        "pand    %%mm1, %%mm2\n\t"
+        "pand    %%mm1, %%mm0\n\t"
+
+        "movq    %%mm1, %%mm3\n\t"
+        "pxor    %%mm5, %%mm3\n\t"
+        "pand    %%mm3, %%mm6\n\t"
+        "pand    %%mm3, %%mm7\n\t"
+        "pand    %%mm3, %%mm5\n\t"
+
+        "por     %%mm1, %%mm5\n\t"
+        "por     %%mm2, %%mm6\n\t"
+        "por     %%mm0, %%mm7\n\t"
+
+        // a & f
+               "movq    (%%"XBX"), %%mm0\n\t"          // value b from top left                
+               "movq    -2(%%"XBX", %%"XCX"), %%mm1\n\t"       // value d from bottom right                    
+
+               "movq   %%mm0, %%mm3\n\t"
+               "psubusb        %%mm1, %%mm3\n\t"
+               "psubusb %%mm0, %%mm1\n\t"
+               "por            %%mm1, %%mm3\n\t"                                       // abs(b,d)
+
+               "psubusb "_DiffThres", %%mm3\n\t"       // nonzero where abs(b,d) > Thres else 0
+               "pxor   %%mm4, %%mm4\n\t"
+               "pcmpeqb %%mm4, %%mm3\n\t"                      // now ff where abs(b,d) < Thres, else 00       
+               "pcmpeqb        %%mm3, %%mm4\n\t"                       // here ff where abs(b,d) > Thres, else 00
+
+               "movq    -2(%%"XBX"), %%mm0\n\t"                // value a
+               "movq    2(%%"XBX", %%"XCX"), %%mm1\n\t"        // value f
+               "movq   %%mm0, %%mm2\n\t"                                       
+               V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(a,f)
+        "movq  %%mm0, %%mm3\n\t"
+        "psubusb       %%mm1, %%mm0\n\t"
+               "psubusb %%mm3, %%mm1\n\t"
+               "por            %%mm1, %%mm0\n\t"                                       // abs(a,f)
+
+        "movq    %%mm0, %%mm1\n\t"
+               "psubusb "_DiffThres", %%mm1\n\t"               // nonzero where abs(a,f) > Thres else 0
+               "pxor   %%mm3, %%mm3\n\t"
+               "pcmpeqb %%mm3, %%mm1\n\t"                      // now ff where abs(a,f) < Thres, else 00       
+
+        "pand    %%mm4, %%mm1\n\t"
+
+        "pand    %%mm1, %%mm2\n\t"
+        "pand    %%mm1, %%mm0\n\t"
+            
+        "movq    %%mm1, %%mm3\n\t"
+        "pxor    %%mm5, %%mm3\n\t"
+        "pand    %%mm3, %%mm6\n\t"
+        "pand    %%mm3, %%mm7\n\t"
+        "pand    %%mm3, %%mm5\n\t"
+
+        "por     %%mm1, %%mm5\n\t"
+        "por     %%mm2, %%mm6\n\t"
+        "por     %%mm0, %%mm7\n\t"
+           
+               "pand   "_YMask", %%mm5\n\t"            // mask out chroma from here
+               "pand   "_YMask", %%mm6\n\t"                    // mask out chroma from here
+               "pand   "_YMask", %%mm7\n\t"                    // mask out chroma from here
+
+               // b,e
+               "movq    (%%"XBX"), %%mm0\n\t"          // value b from top             
+               "movq    (%%"XBX", %%"XCX"), %%mm1\n\t" // value e from bottom 
+               "movq   %%mm0, %%mm2\n\t"                                       
+               V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(b,e)
+        "movq  %%mm0, %%mm3\n\t"
+        "psubusb       %%mm1, %%mm0\n\t"
+               "psubusb %%mm3, %%mm1\n\t"
+               "por            %%mm1, %%mm0\n\t"                                       // abs(b,e)
+
+        "movq    %%mm0, %%mm1\n\t"
+               "psubusb "_DiffThres", %%mm1\n\t"               // nonzero where abs(b,e) > Thres else 0
+               "pxor   %%mm3, %%mm3\n\t"
+               "pcmpeqb %%mm3, %%mm1\n\t"              // now ff where abs(b,e) < Thres, else 00       
+
+        "pand    %%mm1, %%mm2\n\t"
+        "pand    %%mm1, %%mm0\n\t"
+
+        "movq    %%mm1, %%mm3\n\t"
+        "pxor    %%mm5, %%mm3\n\t"
+        "pand    %%mm3, %%mm6\n\t"
+        "pand    %%mm3, %%mm7\n\t"
+        "pand    %%mm3, %%mm5\n\t"
+
+        "por     %%mm1, %%mm5\n\t"
+        "por     %%mm2, %%mm6\n\t"
+        "por     %%mm0, %%mm7\n\t"
+
+               // bob in any leftovers
+               "movq    (%%"XBX"), %%mm0\n\t"          // value b from top             
+               "movq    (%%"XBX", %%"XCX"), %%mm1\n\t" // value e from bottom 
+
+
+// We will also calc here the max/min values to later limit comb
+// so the max excursion will not exceed the Max_Comb constant
+
+#ifdef SKIP_SEARCH             
+               "movq   %%mm0, %%mm2\n\t"
+//             pminub  %%mm2, %%mm1
+               V_PMINUB ("%%mm2", "%%mm1", "%%mm4")
+
+//             pmaxub  %%mm6, %%mm2                    // clip our current results so far to be above this
+               V_PMAXUB ("%%mm6", "%%mm2")
+        "movq  %%mm0, %%mm2\n\t"
+               V_PMAXUB ("%%mm2", "%%mm1")
+//             pminub  %%mm6, %%mm2                    // clip our current results so far to be below this
+               V_PMINUB ("%%mm6", "%%mm2", "%%mm4")
+
+#else
+        "movq  %%mm0, %%mm2\n\t"
+               "movq   (%%"XAX"), %%mm4\n\t"
+               "psubusb %%mm4, %%mm2\n\t"
+               "psubusb %%mm0, %%mm4\n\t"
+               "por            %%mm2, %%mm4\n\t"                       // abs diff
+               
+               "movq   %%mm1, %%mm2\n\t"
+               "movq   (%%"XAX", %%"XCX"), %%mm3\n\t"
+               "psubusb %%mm3, %%mm2\n\t"
+               "psubusb %%mm1, %%mm3\n\t"
+               "por            %%mm2, %%mm3\n\t"                       // abs diff
+//             pmaxub  %%mm3, %%mm4                    // top or bottom pixel moved most
+               V_PMAXUB ("%%mm3", "%%mm4")                     // top or bottom pixel moved most
+        "psubusb "_DiffThres", %%mm3\n\t"              // moved more than allowed? or goes to 0?
+               "pxor   %%mm4, %%mm4\n\t"
+               "pcmpeqb %%mm4, %%mm3\n\t"                      // now ff where low motion, else high motion
+               
+               "movq   %%mm0, %%mm2\n\t"
+//             pminub  %%mm2, %%mm1
+               V_PMINUB ("%%mm2", "%%mm1", "%%mm4")
+
+//             pmaxub  %%mm6, %%mm2                    // clip our current results so far to be above this
+               V_PMAXUB ("%%mm6", "%%mm2")
+
+        "psubusb %%mm3, %%mm2\n\t"                     // maybe decrease it to 0000.. if no surround motion
+               "movq   %%mm2, "_Min_Vals"\n\t"
+
+               "movq   %%mm0, %%mm2\n\t"
+               V_PMAXUB ("%%mm2", "%%mm1")
+//             pminub  %%mm6, %%mm2                    // clip our current results so far to be below this
+               V_PMINUB ("%%mm6", "%%mm2", "%%mm4")
+        "paddusb %%mm3, %%mm2\n\t"                     // maybe increase it to ffffff if no surround motion
+               "movq   %%mm2, "_Max_Vals"\n\t"
+#endif
+                       
+               "movq   %%mm0, %%mm2\n\t"                                               
+//             pavgb   %%mm2, %%mm1                                    // avg(b,e)
+               V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(b,e)
+                               
+        "movq  %%mm0, %%mm3\n\t"
+               "psubusb        %%mm1, %%mm3\n\t"
+               "psubusb %%mm0, %%mm1\n\t"
+               "por            %%mm1, %%mm3\n\t"                       // abs(b,e)
+               "movq   %%mm3, %%mm1\n\t"                       // keep copy of diffs
+            
+               "pxor   %%mm4, %%mm4\n\t"                       
+               "psubusb %%mm7, %%mm3\n\t"                      // nonzero where new weights bigger, else 0
+               "pcmpeqb %%mm4, %%mm3\n\t"                      // now ff where new better, else 00     
+        "pcmpeqb %%mm0, %%mm0\n\t"
+        "pandn   %%mm0, %%mm5\n\t"
+        "por     %%mm5, %%mm3\n\t"
+               "pcmpeqb        %%mm3, %%mm4\n\t"                       // here ff where old better, else 00
+
+               "pand   %%mm3, %%mm1\n\t"
+               "pand   %%mm3, %%mm2\n\t"
+        
+               "pand    %%mm4, %%mm6\n\t"
+               "pand    %%mm4, %%mm7\n\t"
+
+               "por            %%mm2, %%mm6\n\t"                       // our x2 value
+               "por            %%mm1, %%mm7\n\t"                       // our x2 diffs
+               "movq   %%mm7, %%mm4\n\t"                       // save as bob uncertainty indicator
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc b/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc
new file mode 100644 (file)
index 0000000..f046f5e
--- /dev/null
@@ -0,0 +1,220 @@
+/*
+ * GStreamer
+ * Copyright (c) 2002 Tom Barry  All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Relicensed for GStreamer from GPL to LGPL with permit from Tom Barry.
+ * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
+ */
+
+
+#ifndef TopFirst
+#define TopFirst IsOdd
+#endif
+
+#ifdef SEFUNC
+#undef SEFUNC
+#endif
+
+#if defined(IS_SSE)
+#define SEFUNC(x) Search_Effort_SSE_##x()
+#elif defined(IS_3DNOW)
+#define SEFUNC(x) Search_Effort_3DNOW_##x()
+#else
+#define SEFUNC(x) Search_Effort_MMX_##x()
+#endif
+
+#include "TomsMoCompAll2.inc"
+
+#define USE_STRANGE_BOB
+
+#include "TomsMoCompAll2.inc"
+
+#undef USE_STRANGE_BOB
+
+
+void FUNCT_NAME(GstDeinterlace2* object)
+{
+  pMyMemcpy = object->pMemcpy;
+  
+  /* double stride do address just every odd/even scanline */
+  src_pitch = object->field_stride;
+  dst_pitch = object->output_stride;
+  rowsize   = object->line_length;
+  FldHeight = object->field_height;
+
+  pCopySrc   = GST_BUFFER_DATA(object->field_history[object->history_count-1].buf);
+  pCopySrcP  = GST_BUFFER_DATA(object->field_history[object->history_count-3].buf);
+  pWeaveSrc  = GST_BUFFER_DATA(object->field_history[object->history_count-2].buf);  
+  pWeaveSrcP = GST_BUFFER_DATA(object->field_history[object->history_count-4].buf);
+
+  /* use bottom field and interlace top field */
+  if (object->field_history[object->history_count-2].flags == PICTURE_INTERLACED_BOTTOM) {
+    IsOdd      = 1;
+
+    // if we have an odd field we copy an even field and weave an odd field
+    pCopyDest = GST_BUFFER_DATA(object->out_buf);
+    pWeaveDest = pCopyDest + dst_pitch;
+  }
+  /* do it vice verca */
+  else {
+
+    IsOdd      = 0;
+    // if we have an even field we copy an odd field and weave an even field
+    pCopyDest = GST_BUFFER_DATA(object->out_buf) + dst_pitch;
+    pWeaveDest = GST_BUFFER_DATA(object->out_buf);
+  }
+
+  
+#ifdef IS_SSE2
+  // SSE2 support temporarily deleted
+#endif
+
+  // copy 1st and last weave lines 
+  Fieldcopy(pWeaveDest, pCopySrc, rowsize,             
+           1, dst_pitch*2, src_pitch);
+  Fieldcopy(pWeaveDest+(FldHeight-1)*dst_pitch*2,
+           pCopySrc+(FldHeight-1)*src_pitch, rowsize, 
+           1, dst_pitch*2, src_pitch);
+  
+#ifdef USE_VERTICAL_FILTER
+  // Vertical Filter currently not implemented for DScaler !!
+  // copy 1st and last lines the copy field
+  Fieldcopy(pCopyDest, pCopySrc, rowsize, 
+           1, dst_pitch*2, src_pitch);
+  Fieldcopy(pCopyDest+(FldHeight-1)*dst_pitch*2,
+           pCopySrc+(FldHeight-1)*src_pitch, rowsize, 
+           1, dst_pitch*2, src_pitch);
+#else
+  
+  // copy all of the copy field
+  Fieldcopy(pCopyDest, pCopySrc, rowsize, 
+           FldHeight, dst_pitch*2, src_pitch);
+#endif 
+  // then go fill in the hard part, being variously lazy depending upon
+  // SearchEffort
+
+  if(!UseStrangeBob) {
+    if (SearchEffort == 0)
+      {
+       SEFUNC(0);
+      }
+    else if (SearchEffort <= 1)
+      {
+       SEFUNC(1);
+      }
+    /* else if (SearchEffort <= 2)
+       {
+       SEFUNC(2);
+       }
+    */
+    else if (SearchEffort <= 3)
+      {
+       SEFUNC(3);
+      }
+    else if (SearchEffort <= 5)
+      {
+       SEFUNC(5);
+      }
+    else if (SearchEffort <= 9)
+      {
+       SEFUNC(9);
+      }
+    else if (SearchEffort <= 11)
+      {
+       SEFUNC(11);
+      }
+    else if (SearchEffort <= 13)
+      {
+       SEFUNC(13);
+      }
+    else if (SearchEffort <= 15)
+      {
+       SEFUNC(15);
+      }
+    else if (SearchEffort <= 19)
+      {
+       SEFUNC(19);
+      }
+    else if (SearchEffort <= 21)
+      {
+       SEFUNC(21);
+      }
+    else 
+      {
+       SEFUNC(Max);
+      }
+  }
+  else
+    {
+      if (SearchEffort == 0)
+       {
+         SEFUNC(0_SB);
+       }
+      else if (SearchEffort <= 1)
+       {
+         SEFUNC(1_SB);
+       }
+      /*       else if (SearchEffort <= 2)
+               {
+               SEFUNC(2_SB);
+               }
+      */
+      else if (SearchEffort <= 3)
+       {
+         SEFUNC(3_SB);
+       }
+      else if (SearchEffort <= 5)
+       {
+         SEFUNC(5_SB);
+       }
+      else if (SearchEffort <= 9)
+       {
+         SEFUNC(9_SB);
+       }
+      else if (SearchEffort <= 11)
+       {
+         SEFUNC(11_SB);
+       }
+      else if (SearchEffort <= 13)
+       {
+         SEFUNC(13_SB);
+       }
+      else if (SearchEffort <= 15)
+       {
+         SEFUNC(15_SB);
+       }
+      else if (SearchEffort <= 19)
+       {
+         SEFUNC(19_SB);
+       }
+      else if (SearchEffort <= 21)
+       {
+         SEFUNC(21_SB);
+       }
+      else 
+       {
+         SEFUNC(Max_SB);
+       }
+    }
+
+#ifdef ARCH_386
+  __asm__ __volatile__("emms");
+#endif
+}
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc b/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc
new file mode 100644 (file)
index 0000000..baf1a1a
--- /dev/null
@@ -0,0 +1,174 @@
+// -*- c++ -*-
+
+#ifdef SEARCH_EFFORT_FUNC
+#undef SEARCH_EFFORT_FUNC
+#endif
+
+#ifdef USE_STRANGE_BOB
+#define SEARCH_EFFORT_FUNC(n) SEFUNC(n##_SB)
+#else
+#define SEARCH_EFFORT_FUNC(n) SEFUNC(n)
+#endif
+
+int SEARCH_EFFORT_FUNC(0)              // we don't try at all ;-)
+{
+               //see Search_Effort_Max() for comments
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+}
+
+int SEARCH_EFFORT_FUNC(1)
+{
+               //see Search_Effort_Max() for comments
+#include "SearchLoopTop.inc"
+       RESET_CHROMA            // pretend chroma diffs was 255 each
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+}
+
+int SEARCH_EFFORT_FUNC(3)
+{
+               //see Search_Effort_Max() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA2.inc"
+       RESET_CHROMA            // pretend chroma diffs was 255 each
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+}
+
+int SEARCH_EFFORT_FUNC(5)
+{
+               //see Search_Effort_Max() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA2.inc"
+#include "SearchLoopOddAH2.inc"
+       RESET_CHROMA            // pretend chroma diffs was 255 each
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+}
+
+// 3x3 search
+int SEARCH_EFFORT_FUNC(9)
+{
+               //see SearchEffortMax() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA.inc"
+       RESET_CHROMA                    // pretend chroma diffs was 255 each
+#include "SearchLoopVA.inc"
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+}
+
+// Search 9 with 2 H-half pels added
+int SEARCH_EFFORT_FUNC(11)
+{
+               //see SearchEffortMax() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA.inc"
+#include "SearchLoopOddAH2.inc"
+       RESET_CHROMA                    // pretend chroma diffs was 255 each
+#include "SearchLoopVA.inc"
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+}
+
+// Search 11 with 2 V-half pels added
+int SEARCH_EFFORT_FUNC(13)
+{
+               //see SearchEffortMax() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA.inc"
+#include "SearchLoopOddAH2.inc"
+       RESET_CHROMA                    // pretend chroma diffs was 255 each
+#include "SearchLoopVAH.inc"
+#include "SearchLoopVA.inc"
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+}
+
+// 5x3
+int SEARCH_EFFORT_FUNC(15)
+{
+               //see SearchEffortMax() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA.inc"
+       RESET_CHROMA                    // pretend chroma diffs was 255 each
+#include "SearchLoopEdgeA.inc"
+#include "SearchLoopVA.inc"
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+}
+
+// 5x3 + 4 half pels
+int SEARCH_EFFORT_FUNC(19)
+{
+               //see SearchEffortMax() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA.inc"
+#include "SearchLoopOddAH2.inc"
+       RESET_CHROMA                    // pretend chroma diffs was 255 each
+#include "SearchLoopEdgeA.inc"
+#include "SearchLoopVAH.inc"
+#include "SearchLoopVA.inc"
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+}
+
+// Handle one 4x1 block of pixels
+// Search a 7x3 area, no half pels
+
+int SEARCH_EFFORT_FUNC(21)
+{
+               //see SearchLoopTop.inc for comments
+#include "SearchLoopTop.inc"
+
+               // odd addresses -- the pixels at odd address wouldn't generate
+               // good luma values but we will mask those off
+
+#include "SearchLoopOddA6.inc"  // 4 odd v half pels, 3 to left & right
+#include "SearchLoopOddA.inc"   // 6 odd pels, 1 to left & right
+
+       RESET_CHROMA            // pretend chroma diffs was 255 each
+
+               // even addresses -- use both luma and chroma from these
+               // search averages of 2 pixels left and right
+#include "SearchLoopEdgeA.inc"
+               // search vertical line and averages, -1,0,+1
+#include "SearchLoopVA.inc"
+               // blend our results and loop
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+}
+
+// Handle one 4x1 block of pixels
+// Search a 9x3 area, no half pels
+int SEARCH_EFFORT_FUNC(Max)
+{
+               //see SearchLoopTop.inc for comments
+#include "SearchLoopTop.inc"
+
+               // odd addresses -- the pixels at odd address wouldn't generate
+               // good luma values but we will mask those off
+
+#include "SearchLoopOddA6.inc"  // 4 odd v half pels, 3 to left & right
+#include "SearchLoopOddA.inc"   // 6 odd pels, 1 to left & right
+
+       RESET_CHROMA            // pretend chroma diffs was 255 each
+
+               // even addresses -- use both luma and chroma from these
+               // search averages of 4 pixels left and right
+#include "SearchLoopEdgeA8.inc"
+               // search averages of 2 pixels left and right
+#include "SearchLoopEdgeA.inc"
+               // search vertical line and averages, -1,0,+1
+#include "SearchLoopVA.inc"
+               // blend our results and loop
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+}
+
+#undef SEARCH_EFFORT_FUNC
+
+
diff --git a/gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc b/gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc
new file mode 100644 (file)
index 0000000..36fd9d2
--- /dev/null
@@ -0,0 +1,189 @@
+// -*- c++ -*-
+
+               // First, get and save our possible Bob values
+               // Assume our pixels are layed out as follows with x the calc'd bob value
+               // and the other pixels are from the current field
+               //  
+               //        j a b c k             current field
+               //            x                 calculated line
+               //        m d e f n             current field
+               //
+               // we calc the bob value as:
+               //              x2 = either avg(a,f), avg(c,d), avg(b,e), avg(j,n), or avg(k,m)
+                
+               // selected for the     smallest of abs(a,f), abs(c,d), or abs(b,e), etc.
+
+               // a,f
+               "movq    -2(%%"XBX"), %%mm0\n\t"                // value a from top left                
+               "movq    2(%%"XBX", %%"XCX"), %%mm1\n\t"        // value f from bottom right                    
+               "movq   %%mm0, %%mm6\n\t"                                       
+//             pavgb   %%mm6, %%mm1                                    // avg(a,f), also best so far
+               V_PAVGB ("%%mm6", "%%mm1", "%%mm7", _ShiftMask) // avg(a,f), also best so far
+        "movq  %%mm0, %%mm7\n\t"
+               "psubusb         %%mm1, %%mm7\n\t"
+               "psubusb %%mm0, %%mm1\n\t"
+               "por            %%mm1, %%mm7\n\t"                                       // abs diff, also best so far
+
+               // c,d
+               "movq    2(%%"XBX"), %%mm0\n\t"         // value a from top left                
+               "movq    -2(%%"XBX", %%"XCX"), %%mm1\n\t"       // value f from bottom right                    
+               "movq   %%mm0, %%mm2\n\t"                                               
+//             pavgb   %%mm2, %%mm1                                    // avg(c,d)
+               V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(c,d)
+        "movq  %%mm0, %%mm3\n\t"
+               "psubusb        %%mm1, %%mm3\n\t"
+               "psubusb %%mm0, %%mm1\n\t"
+               "por            %%mm1, %%mm3\n\t"                                       // abs(c,d)
+               "movq   %%mm3, %%mm1\n\t"                                       // keep copy
+
+               "psubusb %%mm7, %%mm3\n\t"                      // nonzero where new weights bigger, else 0
+               "pxor   %%mm4, %%mm4\n\t"
+               "pcmpeqb %%mm4, %%mm3\n\t"                      // now ff where new better, else 00     
+               "pcmpeqb        %%mm3, %%mm4\n\t"                       // here ff where old better, else 00
+
+               "pand   %%mm3, %%mm1\n\t"                       // keep only better new avg and abs
+               "pand   %%mm3, %%mm2\n\t"
+
+               "pand   %%mm4, %%mm6\n\t"
+               "pand    %%mm4, %%mm7\n\t"
+
+               "por            %%mm2, %%mm6\n\t"                       // and merge new & old vals keeping best
+               "por            %%mm1, %%mm7\n\t"
+               "por            "_UVMask", %%mm7\n\t"                   // but we know chroma is worthless so far
+               "pand   "_YMask", %%mm5\n\t"                    // mask out chroma from here also
+
+               // j,n
+               "movq    -4(%%"XBX"), %%mm0\n\t"                // value j from top left                
+               "movq    4(%%"XBX", %%"XCX"), %%mm1\n\t"        // value n from bottom right                    
+               "movq   %%mm0, %%mm2\n\t"                                               
+//             pavgb   %%mm2, %%mm1                                    // avg(j,n)
+               V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(j,n)
+        "movq  %%mm0, %%mm3\n\t"
+               "psubusb        %%mm1, %%mm3\n\t"
+               "psubusb %%mm0, %%mm1\n\t"
+               "por            %%mm1, %%mm3\n\t"                                       // abs(j-n)
+               "movq   %%mm3, %%mm1\n\t"                                       // keep copy
+
+               "psubusb %%mm7, %%mm3\n\t"                      // nonzero where new weights bigger, else 0
+               "pxor   %%mm4, %%mm4\n\t"
+               "pcmpeqb %%mm4, %%mm3\n\t"                      // now ff where new better, else 00     
+               "pcmpeqb        %%mm3, %%mm4\n\t"                       // here ff where old better, else 00
+
+               "pand   %%mm3, %%mm1\n\t"                       // keep only better new avg and abs
+               "pand   %%mm2, %%mm3\n\t"
+
+               "pand   %%mm4, %%mm6\n\t"
+               "pand    %%mm4, %%mm7\n\t"
+
+               "por            %%mm3, %%mm6\n\t"                       // and merge new & old vals keeping best
+               "por            %%mm1, %%mm7\n\t"                       // "
+
+               // k, m
+               "movq    4(%%"XBX"), %%mm0\n\t"         // value k from top right               
+               "movq    -4(%%"XBX", %%"XCX"), %%mm1\n\t"       // value n from bottom left                     
+               "movq   %%mm0, %%mm4\n\t"                                               
+//             pavgb   %%mm4, %%mm1                                    // avg(k,m)
+               V_PAVGB ("%%mm4", "%%mm1", "%%mm3", _ShiftMask) // avg(k,m)
+
+        "movq  %%mm0, %%mm3\n\t"
+               "psubusb        %%mm1, %%mm3\n\t"
+               "psubusb %%mm0, %%mm1\n\t"
+               "por            %%mm1, %%mm3\n\t"                                       // abs(k,m)
+               "movq   %%mm3, %%mm1\n\t"                                       // keep copy
+            
+               "movq   %%mm4, %%mm2\n\t"                       // avg(k,m)
+
+               "psubusb %%mm7, %%mm3\n\t"                      // nonzero where new weights bigger, else 0
+               "pxor   %%mm4, %%mm4\n\t"
+               "pcmpeqb %%mm4, %%mm3\n\t"                      // now ff where new better, else 00     
+               "pcmpeqb        %%mm3, %%mm4\n\t"                       // here ff where old better, else 00
+
+               "pand   %%mm3, %%mm1\n\t"                       // keep only better new avg and abs
+               "pand   %%mm2, %%mm3\n\t"
+            
+               "pand   %%mm4, %%mm6\n\t"
+               "pand    %%mm4, %%mm7\n\t"
+
+               "por            %%mm3, %%mm6\n\t"                       // and merge new & old vals keeping best
+               "por            %%mm1, %%mm7\n\t"                       // "
+
+               // b,e
+               "movq    (%%"XBX"), %%mm0\n\t"          // value b from top             
+               "movq    (%%"XBX", %%"XCX"), %%mm1\n\t" // value e from bottom 
+
+// We will also calc here the max/min values to later limit comb
+// so the max excursion will not exceed the Max_Comb constant
+
+#ifdef SKIP_SEARCH             
+               "movq   %%mm0, %%mm2\n\t"
+//             pminub  %%mm2, %%mm1
+               V_PMINUB ("%%mm2", "%%mm1", "%%mm4")
+
+//             pmaxub  %%mm6, %%mm2                    // clip our current results so far to be above this
+               V_PMAXUB ("%%mm6", "%%mm2")
+               "movq   %%mm0, %%mm2\n\t"
+               V_PMAXUB ("%%mm2", "%%mm1")
+//             pminub  %%mm6, %%mm2                    // clip our current results so far to be below this
+               V_PMINUB ("%%mm6", "%%mm2", "%%mm4")
+
+#else
+        "movq  %%mm0, %%mm2\n\t"
+               "movq   (%%"XAX"), %%mm4\n\t"
+               "psubusb %%mm4, %%mm2\n\t"
+               "psubusb %%mm0, %%mm4\n\t"
+               "por            %%mm2, %%mm4\n\t"                       // abs diff
+               
+               "movq   %%mm1, %%mm2\n\t"
+               "movq   (%%"XAX", %%"XCX"), %%mm3\n\t"
+               "psubusb %%mm3, %%mm2\n\t"
+               "psubusb %%mm1, %%mm3\n\t"
+               "por            %%mm2, %%mm3\n\t"                       // abs diff
+//             pmaxub  %%mm3, %%mm4                    // top or bottom pixel moved most
+               V_PMAXUB ("%%mm3", "%%mm4")                     // top or bottom pixel moved most
+        "psubusb "_Max_Mov", %%mm3\n\t"                // moved more than allowed? or goes to 0?
+               "pxor   %%mm4, %%mm4\n\t"
+               "pcmpeqb %%mm4, %%mm3\n\t"                      // now ff where low motion, else high motion
+               
+               "movq   %%mm0, %%mm2\n\t"
+//             pminub  %%mm2, %%mm1
+               V_PMINUB ("%%mm2", "%%mm1", "%%mm4")
+
+//             pmaxub  %%mm6, %%mm2                    // clip our current results so far to be above this
+               V_PMAXUB ("%%mm6", "%%mm2")
+
+               "psubusb %%mm3, %%mm2\n\t"                      // maybe decrease it to 0000.. if no surround motion
+               "movq   %%mm2, "_Min_Vals"\n\t"
+
+               "movq   %%mm0, %%mm2\n\t"
+               V_PMAXUB ("%%mm2", "%%mm1")
+//             pminub  %%mm6, %%mm2                    // clip our current results so far to be below this
+               V_PMINUB ("%%mm6", "%%mm2", "%%mm4")
+        "paddusb %%mm3, %%mm2\n\t"                     // maybe increase it to ffffff if no surround motion
+               "movq   %%mm2, "_Max_Vals"\n\t"
+#endif
+        
+               "movq   %%mm0, %%mm2\n\t"                                               
+//             pavgb   %%mm2, %%mm1                                    // avg(b,e)
+               V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(b,e)
+                               
+        "movq  %%mm0, %%mm3\n\t"
+               "psubusb        %%mm1, %%mm3\n\t"
+               "psubusb %%mm0, %%mm1\n\t"
+               "por            %%mm1, %%mm3\n\t"                       // abs(c,d)
+               "movq   %%mm3, %%mm1\n\t"                       // keep copy of diffs
+
+               "pxor   %%mm4, %%mm4\n\t"                       
+               "psubusb %%mm7, %%mm3\n\t"                      // nonzero where new weights bigger, else 0
+               "pcmpeqb %%mm4, %%mm3\n\t"                      // now ff where new better, else 00     
+               "pcmpeqb        %%mm3, %%mm4\n\t"                       // here ff where old better, else 00
+
+               "pand   %%mm3, %%mm1\n\t"
+               "pand   %%mm3, %%mm2\n\t"
+
+               "pand    %%mm4, %%mm6\n\t"
+               "pand    %%mm4, %%mm7\n\t"
+
+               "por            %%mm2, %%mm6\n\t"                       // our x2 value
+               "por            %%mm1, %%mm7\n\t"                       // our x2 diffs
+               "movq   %%mm7, %%mm4\n\t"                       // save as bob uncertainty indicator
+