From 4f836b7732ccaf0421081cbe4198b97dabd60513 Mon Sep 17 00:00:00 2001 From: Martin Eikermann Date: Wed, 11 Jun 2008 11:12:49 +0000 Subject: [PATCH] gst/deinterlace2/: Add a deinterlacer plugin based on the tvtime/DScaler deinterlacer, which was relicensed to LGPL f... Original commit message from CVS: Based on a patch by: Martin Eikermann * gst/deinterlace2/Makefile.am: * gst/deinterlace2/gstdeinterlace2.c: (gst_deinterlace2_method_get_type), (gst_deinterlace2_fields_get_type), (gst_deinterlace2_field_layout_get_type), (gst_deinterlace2_base_init), (gst_deinterlace2_class_init), (gst_deinterlace2_init), (gst_deinterlace2_set_method), (gst_deinterlace2_set_property), (gst_deinterlace2_get_property), (gst_deinterlace2_finalize), (gst_deinterlace2_pop_history), (gst_deinterlace2_head_history), (gst_deinterlace2_push_history), (gst_deinterlace2_deinterlace_scanlines), (gst_deinterlace2_chain), (gst_deinterlace2_setcaps), (gst_deinterlace2_sink_event), (gst_deinterlace2_change_state), (gst_deinterlace2_src_event), (gst_deinterlace2_src_query), (gst_deinterlace2_src_query_types), (plugin_init): * gst/deinterlace2/gstdeinterlace2.h: * gst/deinterlace2/tvtime/greedy.c: (copy_scanline), (deinterlace_greedy_packed422_scanline_mmxext), (dscaler_greedyl_get_method): * gst/deinterlace2/tvtime/greedyh.asm: * gst/deinterlace2/tvtime/greedyh.c: (deinterlace_frame_di_greedyh), (dscaler_greedyh_get_method), (greedyh_init), (greedyh_filter_mmx), (greedyh_filter_3dnow), (greedyh_filter_sse): * gst/deinterlace2/tvtime/greedyh.h: * gst/deinterlace2/tvtime/greedyhmacros.h: * gst/deinterlace2/tvtime/mmx.h: * gst/deinterlace2/tvtime/plugins.h: * gst/deinterlace2/tvtime/speedtools.h: * gst/deinterlace2/tvtime/speedy.c: (multiply_alpha), (clip255), (comb_factor_packed422_scanline_mmx), (diff_factor_packed422_scanline_c), (diff_factor_packed422_scanline_mmx), (diff_packed422_block8x8_mmx), (diff_packed422_block8x8_c), (packed444_to_packed422_scanline_c), (packed422_to_packed444_scanline_c), (packed422_to_packed444_rec601_scanline_c), (vfilter_chroma_121_packed422_scanline_mmx), (vfilter_chroma_121_packed422_scanline_c), (vfilter_chroma_332_packed422_scanline_mmx), (vfilter_chroma_332_packed422_scanline_c), (kill_chroma_packed422_inplace_scanline_mmx), (kill_chroma_packed422_inplace_scanline_c), (invert_colour_packed422_inplace_scanline_mmx), (invert_colour_packed422_inplace_scanline_c), (mirror_packed422_inplace_scanline_c), (interpolate_packed422_scanline_c), (convert_uyvy_to_yuyv_scanline_mmx), (convert_uyvy_to_yuyv_scanline_c), (interpolate_packed422_scanline_mmx), (interpolate_packed422_scanline_mmxext), (blit_colour_packed422_scanline_c), (blit_colour_packed422_scanline_mmx), (blit_colour_packed422_scanline_mmxext), (blit_colour_packed4444_scanline_c), (blit_colour_packed4444_scanline_mmx), (blit_colour_packed4444_scanline_mmxext), (small_memcpy), (speedy_memcpy_c), (speedy_memcpy_mmx), (speedy_memcpy_mmxext), (blit_packed422_scanline_c), (blit_packed422_scanline_mmx), (blit_packed422_scanline_mmxext), (composite_colour4444_alpha_to_packed422_scanline_c), (composite_colour4444_alpha_to_packed422_scanline_mmxext), (composite_packed4444_alpha_to_packed422_scanline_c), (composite_packed4444_alpha_to_packed422_scanline_mmxext), (composite_packed4444_to_packed422_scanline_c), (composite_packed4444_to_packed422_scanline_mmxext), (composite_alphamask_to_packed4444_scanline_c), (composite_alphamask_to_packed4444_scanline_mmxext), (composite_alphamask_alpha_to_packed4444_scanline_c), (premultiply_packed4444_scanline_c), (premultiply_packed4444_scanline_mmxext), (blend_packed422_scanline_c), (blend_packed422_scanline_mmxext), (quarter_blit_vertical_packed422_scanline_mmxext), (quarter_blit_vertical_packed422_scanline_c), (subpix_blit_vertical_packed422_scanline_c), (a8_subpix_blit_scanline_c), (myround), (init_RGB_to_YCbCr_tables), (init_YCbCr_to_RGB_tables), (rgb24_to_packed444_rec601_scanline_c), (rgba32_to_packed4444_rec601_scanline_c), (packed444_to_rgb24_rec601_scanline_c), (packed444_to_nonpremultiplied_packed4444_scanline_c), (aspect_adjust_packed4444_scanline_c), (setup_speedy_calls), (speedy_get_accel): * gst/deinterlace2/tvtime/speedy.h: * gst/deinterlace2/tvtime/sse.h: * gst/deinterlace2/tvtime/tomsmocomp.c: (Fieldcopy), (deinterlace_frame_di_tomsmocomp), (dscaler_tomsmocomp_get_method), (tomsmocomp_init), (tomsmocomp_filter_mmx), (tomsmocomp_filter_3dnow), (tomsmocomp_filter_sse): * gst/deinterlace2/tvtime/tomsmocomp.h: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoop0A.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA8.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA2.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA6.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH2.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVA.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVAH.inc: * gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc: * gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc: * gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc: * gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc: * gst/deinterlace2/tvtime/vfir.c: (deinterlace_line), (deinterlace_scanline_vfir), (copy_scanline), (dscaler_vfir_get_method): * gst/deinterlace2/tvtime/x86-64_macros.inc: Add a deinterlacer plugin based on the tvtime/DScaler deinterlacer, which was relicensed to LGPL for GStreamer and in theory provides better and faster results than the simple deinterlace element. Fixes bug #163578. Ported to GStreamer 0.10 but still not enabled or included in the build system by default because of bad artefacts caused by a bug somewhere and as it can be only build on x86/amd64 ATM and requires special CFLAGS. Will be fixed soon. --- .../tvtime/tomsmocomp/SearchLoop0A.inc | 15 + .../tvtime/tomsmocomp/SearchLoopBottom.inc | 116 ++++++++ .../tvtime/tomsmocomp/SearchLoopEdgeA.inc | 11 + .../tvtime/tomsmocomp/SearchLoopEdgeA8.inc | 12 + .../tvtime/tomsmocomp/SearchLoopOddA.inc | 10 + .../tvtime/tomsmocomp/SearchLoopOddA2.inc | 5 + .../tvtime/tomsmocomp/SearchLoopOddA6.inc | 11 + .../tvtime/tomsmocomp/SearchLoopOddAH.inc | 10 + .../tvtime/tomsmocomp/SearchLoopOddAH2.inc | 5 + .../tvtime/tomsmocomp/SearchLoopTop.inc | 193 ++++++++++++ .../tvtime/tomsmocomp/SearchLoopVA.inc | 6 + .../tvtime/tomsmocomp/SearchLoopVAH.inc | 6 + gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc | 322 +++++++++++++++++++++ .../tvtime/tomsmocomp/TomsMoCompAll.inc | 220 ++++++++++++++ .../tvtime/tomsmocomp/TomsMoCompAll2.inc | 174 +++++++++++ gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc | 189 ++++++++++++ 16 files changed, 1305 insertions(+) create mode 100644 gst/deinterlace2/tvtime/tomsmocomp/SearchLoop0A.inc create mode 100644 gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc create mode 100644 gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA.inc create mode 100644 gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA8.inc create mode 100644 gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA.inc create mode 100644 gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA2.inc create mode 100644 gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA6.inc create mode 100644 gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH.inc create mode 100644 gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH2.inc create mode 100644 gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc create mode 100644 gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVA.inc create mode 100644 gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVAH.inc create mode 100644 gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc create mode 100644 gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc create mode 100644 gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc create mode 100644 gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoop0A.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoop0A.inc new file mode 100644 index 0000000..b1d9aec --- /dev/null +++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoop0A.inc @@ -0,0 +1,15 @@ +// -*- c++ -*- + +// Searches just the center pixel, in both the old +// and new fields, but takes averages. This is an even +// pixel address. Any chroma match will be used. (YUY2) +// We best like finding 0 motion so we will bias everything we found previously +// up by a little, and adjust later + +#ifdef IS_SSE2 + "paddusb "_ONES", %%xmm7\n\t" // bias toward no motion +#else + "paddusb "_ONES", %%mm7\n\t" // bias toward no motion +#endif + + MERGE4PIXavg("(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")") // center, in old and new diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc new file mode 100644 index 0000000..4b64b52 --- /dev/null +++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc @@ -0,0 +1,116 @@ +// -*- c++ -*- + +#ifdef IS_SSE2 +//sse2 code deleted for now +#else + +// Version for non-SSE2 + +#ifdef SKIP_SEARCH + "movq %%mm6, %%mm0\n\t" // just use the results of our wierd bob +#else + + + // JA 9/Dec/2002 + // failed experiment + // but leave in placeholder for me to play about +#ifdef DONT_USE_STRANGE_BOB + // Use the best weave if diffs less than 10 as that + // means the image is still or moving cleanly + // if there is motion we will clip which will catch anything + "psubusb "_FOURS", %%mm7\n\t" // sets bits to zero if weave diff < 4 + "pxor %%mm0, %%mm0\n\t" + "pcmpeqb %%mm0, %%mm7\n\t" // all ff where weave better, else 00 + "pcmpeqb %%mm7, %%mm0\n\t" // all ff where bob better, else 00 + "pand %%mm6, %%mm0\n\t" // use bob for these pixel values + "pand %%mm5, %%mm7\n\t" // use weave for these + "por %%mm7, %%mm0\n\t" // combine both +#else + // Use the better of bob or weave + // pminub mm4, TENS // the most we care about + V_PMINUB ("%%mm4", _TENS, "%%mm0") // the most we care about + + "psubusb %%mm4, %%mm7\n\t" // foregive that much from weave est? + "psubusb "_FOURS", %%mm7\n\t" // bias it a bit toward weave + "pxor %%mm0, %%mm0\n\t" + "pcmpeqb %%mm0, %%mm7\n\t" // all ff where weave better, else 00 + "pcmpeqb %%mm7, %%mm0\n\t" // all ff where bob better, else 00 + "pand %%mm6, %%mm0\n\t" // use bob for these pixel values + "pand %%mm5, %%mm7\n\t" // use weave for these + "por %%mm7, %%mm0\n\t" // combine both +#endif + + + // pminub mm0, Max_Vals // but clip to catch the stray error + V_PMINUB ("%%mm0", _Max_Vals, "%%mm1") // but clip to catch the stray error + // pmaxub mm0, Min_Vals + V_PMAXUB ("%%mm0", _Min_Vals) + +#endif + + + MOVX" "_pDest", %%"XAX"\n\t" + +#ifdef USE_VERTICAL_FILTER + "movq %%mm0, %%mm1\n\t" + // pavgb mm0, qword ptr["XBX"] + V_PAVGB ("%%mm0", "(%%"XBX")", "%%mm2", _ShiftMask) + // movntq qword ptr["XAX"+"XDX"], mm0 + V_MOVNTQ ("(%"XAX", %%"XDX")", "%%mm0") + // pavgb mm1, qword ptr["XBX"+"XCX"] + V_PAVGB ("%%mm1", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask) + "addq "_dst_pitchw", %%"XBX + // movntq qword ptr["XAX"+"XDX"], mm1 + V_MOVNTQ ("(%%"XAX", %%"XDX")", "%%mm1") +#else + + // movntq qword ptr["XAX"+"XDX"], mm0 + V_MOVNTQ ("(%%"XAX", %%"XDX")", "%%mm0") +#endif + + LEAX" 8(%%"XDX"), %%"XDX"\n\t" // bump offset pointer + CMPX" "_Last8", %%"XDX"\n\t" // done with line? + "jb 1b\n\t" // y +#endif + + MOVX" "_oldbx", %%"XBX"\n\t" + + : /* no outputs */ + + : "m"(pBob), + "m"(src_pitch2), + "m"(ShiftMask), + "m"(pDest), + "m"(dst_pitchw), + "m"(Last8), + "m"(pSrc), + "m"(pSrcP), + "m"(pBobP), + "m"(DiffThres), + "m"(Min_Vals), + "m"(Max_Vals), + "m"(FOURS), + "m"(TENS), + "m"(ONES), + "m"(UVMask), + "m"(Max_Mov), + "m"(YMask), + "m"(oldbx) + + : XAX, XCX, XDX, XSI, XDI, +#ifdef ARCH_386 + "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)", +#endif + "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", + "memory", "cc" + ); + + // adjust for next line + pSrc += src_pitch2; + pSrcP += src_pitch2; + pDest += dst_pitch2; + pBob += src_pitch2; + pBobP += src_pitch2; + } + + return 0; diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA.inc new file mode 100644 index 0000000..6208fe8 --- /dev/null +++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA.inc @@ -0,0 +1,11 @@ +// -*- c++ -*- + +// Searches 2 pixel to the left and right, in both the old +// and new fields, but takes averages. These are even +// pixel addresses. Chroma match will be used. (YUY2) + MERGE4PIXavg("-4(%%"XDI")", "4(%%"XSI", %%"XCX", 2)") // up left, down right + MERGE4PIXavg("4(%%"XDI")", "-4(%%"XSI", %%"XCX", 2)") // up right, down left + MERGE4PIXavg("-4(%%"XDI", %%"XCX")", "4(%%"XSI", %%"XCX")") // left, right + MERGE4PIXavg("4(%%"XDI", %%"XCX")", "-4(%%"XSI", %%"XCX")") // right, left + MERGE4PIXavg("-4(%%"XDI", %%"XCX", 2)", "4(%%"XSI")") // down left, up right + MERGE4PIXavg("4(%%"XDI", %%"XCX", 2)", "-4(%%"XSI")") // down right, up left diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA8.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA8.inc new file mode 100644 index 0000000..2841c3f --- /dev/null +++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA8.inc @@ -0,0 +1,12 @@ +// -*- c++ -*- + +// Searches 4 pixel to the left and right, in both the old +// and new fields, but takes averages. These are even +// pixel addresses. Chroma match will be used. (YUY2) + MERGE4PIXavg("-8(%%"XDI")", "8(%%"XSI", %%"XCX", 2)") // up left, down right + MERGE4PIXavg("8(%%"XDI")", "-8(%%"XSI", %%"XCX", 2)") // up right, down left + MERGE4PIXavg("-8(%%"XDI", %%"XCX")", "8(%%"XSI", %%"XCX")") // left, right + MERGE4PIXavg("8(%%"XDI", %%"XCX")", "-8(%%"XSI", %%"XCX")") // right, left + MERGE4PIXavg("-8(%%"XDI", %%"XCX", 2)", "8(%%"XSI")") // down left, up right + MERGE4PIXavg("8(%%"XDI", %%"XCX", 2)", "-8(%%"XSI")") // down right, up left + diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA.inc new file mode 100644 index 0000000..ab5375f --- /dev/null +++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA.inc @@ -0,0 +1,10 @@ +// -*- c++ -*- + +// Searches 1 pixel to the left and right, in both the old +// and new fields, but takes averages. These are odd +// pixel addresses. Any chroma match will not be used. (YUY2) + MERGE4PIXavg("-2(%%"XDI")", "2(%%"XSI", %%"XCX", 2)") // up left, down right + MERGE4PIXavg("2(%%"XDI")", "-2(%%"XSI", %%"XCX", 2)") // up right, down left + MERGE4PIXavg("-2(%%"XDI", %%"XCX", 2)", "2(%%"XSI")") // down left, up right + MERGE4PIXavg("2(%%"XDI", %%"XCX", 2)", "-2(%%"XSI")") // down right, up left +#include "SearchLoopOddA2.inc" diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA2.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA2.inc new file mode 100644 index 0000000..fd3f6fb --- /dev/null +++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA2.inc @@ -0,0 +1,5 @@ +// Searches 1 pixel to the left and right, in both the old +// and new fields, but takes averages. These are odd +// pixel addresses. Any chroma match will not be used. (YUY2) + MERGE4PIXavg("-2(%%"XDI", %%"XCX")", "2(%%"XSI", %%"XCX")") // left, right + MERGE4PIXavg("2(%%"XDI", %%"XCX")", "-2(%%"XSI", %%"XCX")") // right, left diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA6.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA6.inc new file mode 100644 index 0000000..cbae014 --- /dev/null +++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA6.inc @@ -0,0 +1,11 @@ +// -*- c++ -*- + +// Searches 3 pixels to the left and right, in both the old +// and new fields, but takes averages. These are odd +// pixel addresses. Any chroma match will not be used. (YUY2) + MERGE4PIXavg("-6(%%"XDI")", "6(%%"XSI", %%"XCX", 2)") // up left, down right + MERGE4PIXavg("6(%%"XDI")", "-6(%%"XSI", %%"XCX", 2)") // up right, down left + MERGE4PIXavg("-6(%%"XDI", %%"XCX")", "6(%%"XSI", %%"XCX")") // left, right + MERGE4PIXavg("6(%%"XDI", %%"XCX")", "-6(%%"XSI", %%"XCX")") // right, left + MERGE4PIXavg("-6(%%"XDI", %%"XCX", 2)", "6(%%"XSI")") // down left, up right + MERGE4PIXavg("6(%%"XDI", %%"XCX", 2)", "-6(%%"XSI")") // down right, up left diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH.inc new file mode 100644 index 0000000..e59e3c7 --- /dev/null +++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH.inc @@ -0,0 +1,10 @@ +// Searches 1 pixel to the left and right, in both the old +// and new fields, but takes v-half pel averages. These are odd +// pixel addresses. Any chroma match will not be used. (YUY2) + __asm + { + MERGE4PIXavgH("XDI"-2, "XDI"+"XCX"-2, "XSI"+"XCX"+2, "XSI"+2*"XCX"+2) // up left, down right + MERGE4PIXavgH("XDI"+2, "XDI"+"XCX"+2, "XSI"+"XCX"-2, "XSI"+2*"XCX"-2) // up right, down left + MERGE4PIXavgH("XDI"+2*"XCX"-2, "XDI"+"XCX"-2, "XSI"+"XCX"+2, "XSI"+2) // down left, up right + MERGE4PIXavgH("XDI"+2*"XCX"+2, "XDI"+"XCX"+2, "XSI"+"XCX"-2, "XSI"-2) // down right, up left + } diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH2.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH2.inc new file mode 100644 index 0000000..cd7d812 --- /dev/null +++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH2.inc @@ -0,0 +1,5 @@ +// Searches 1 pixel to the left and right, in both the old +// and new fields, but takes vertical averages. These are odd +// pixel addresses. Any chroma match will not be used. (YUY2) + MERGE4PIXavgH("-2(%%"XDI", %%"XCX")", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "2(%%"XSI", %%"XCX")") // left, right + MERGE4PIXavgH("2(%%"XDI", %%"XCX")", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "-2(%%"XSI", %%"XCX")") // right, left diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc new file mode 100644 index 0000000..7560f40 --- /dev/null +++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc @@ -0,0 +1,193 @@ +// -*- c++ -*- + +unsigned char* pDest; +const unsigned char* pSrcP; +const unsigned char* pSrc; +const unsigned char* pBob; +const unsigned char* pBobP; + +int64_t Max_Mov = 0x0404040404040404ull; +int64_t DiffThres = 0x0f0f0f0f0f0f0f0full; +int64_t YMask = 0x00ff00ff00ff00ffull; // keeps only luma +int64_t UVMask = 0xff00ff00ff00ff00ull; // keeps only chroma +int64_t TENS = 0x0a0a0a0a0a0a0a0aull; +int64_t FOURS = 0x0404040404040404ull; +int64_t ONES = 0x0101010101010101ull; +int64_t Min_Vals = 0x0000000000000000ull; +int64_t Max_Vals = 0x0000000000000000ull; +int64_t ShiftMask = 0xfefffefffefffeffull; + +// long is int32 on ARCH_368, int64 on ARCH_AMD64. Declaring it this way +// saves a lot of xor's to delete 64bit garbage. + +#if defined(DBL_RESIZE) || defined(USE_FOR_DSCALER) +long src_pitch2 = src_pitch; // even & odd lines are not interleaved in DScaler +#else +long src_pitch2 = 2 * src_pitch; // even & odd lines are interleaved in Avisynth +#endif + + +long dst_pitch2 = 2 * dst_pitch; +long y; + +#ifdef IS_SSE2 +long Last8 = (rowsize-16); // ofs to last 16 bytes in row for SSE2 +#else +long Last8 = (rowsize-8); // ofs to last 8 bytes in row +#endif + +long dst_pitchw = dst_pitch; // local stor so asm can ref + pSrc = pWeaveSrc; // points 1 weave line above + pSrcP = pWeaveSrcP; // " + +#ifdef DBL_RESIZE + +#ifdef USE_VERTICAL_FILTER + pDest = pWeaveDest + dst_pitch2; +#else + pDest = pWeaveDest + 3*dst_pitch; +#endif + +#else + +#ifdef USE_VERTICAL_FILTER + pDest = pWeaveDest + dst_pitch; +#else + pDest = pWeaveDest + dst_pitch2; +#endif + +#endif + + if (TopFirst) + { + pBob = pCopySrc + src_pitch2; // remember one weave line just copied previously + pBobP = pCopySrcP + src_pitch2; + } + else + { + pBob = pCopySrc; + pBobP = pCopySrcP; + } + +#ifndef _pBob +#define _pBob "%0" +#define _src_pitch2 "%1" +#define _ShiftMask "%2" +#define _pDest "%3" +#define _dst_pitchw "%4" +#define _Last8 "%5" +#define _pSrc "%6" +#define _pSrcP "%7" +#define _pBobP "%8" +#define _DiffThres "%9" +#define _Min_Vals "%10" +#define _Max_Vals "%11" +#define _FOURS "%12" +#define _TENS "%13" +#define _ONES "%14" +#define _UVMask "%15" +#define _Max_Mov "%16" +#define _YMask "%17" +#define _oldbx "%18" +#endif + + long oldbx; + + for (y=1; y < FldHeight-1; y++) + { + // pretend it's indented -->> + __asm__ __volatile__ + ( + // Loop general reg usage + // + // XAX - pBobP, then pDest + // XBX - pBob + // XCX - src_pitch2 + // XDX - current offset + // XDI - prev weave pixels, 1 line up + // XSI - next weave pixels, 1 line up + + // Save "XBX" (-fPIC) + MOVX" %%"XBX", "_oldbx"\n\t" + +#ifdef IS_SSE2 + + // sse2 code deleted for now + +#else + // simple bob first 8 bytes + MOVX" "_pBob", %%"XBX"\n\t" + MOVX" "_src_pitch2", %%"XCX"\n\t" + +#ifdef USE_VERTICAL_FILTER + "movq (%%"XBX"), %%mm0\n\t" + "movq (%%"XBX", %%"XCX"), %%mm1\n\t" //, qword ptr["XBX"+"XCX"] + "movq %%mm0, %%mm2\n\t" + V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // halfway between + V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask) // 1/4 way + V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask) // 3/4 way + MOVX" "_pDest", %%"XDI"\n\t" + MOVX" "_dst_pitchw", %%"XAX"\n\t" + V_MOVNTQ ("(%%"XDI")", "%%mm0") + V_MOVNTQ ("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1 + + // simple bob last 8 bytes + MOVX" "_Last8", %%"XDX"\n\t" + LEAX" (%%"XBX", %%"XDX"), %%"XSI"\n\t" // ["XBX"+"XDX"] + "movq (%%"XSI"), %%mm0\n\t" + "movq (%%"XSI", %%"XCX"), %%mm1\n\t" // qword ptr["XSI"+"XCX"] + "movq %%mm0, %%mm2\n\t" + V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // halfway between + V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask) // 1/4 way + V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask) // 3/4 way + ADDX" %%"XDX", %%"XDI"\n\t" // last 8 bytes of dest + V_MOVNTQ ("%%"XDI"", "%%mm0") + V_MOVNTQ ("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1) + +#else + "movq (%%"XBX"), %%mm0\n\t" + // pavgb mm0, qword ptr["XBX"+"XCX"] + V_PAVGB ("%%mm0", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XBX"+"XCX"], mm2, ShiftMask) + MOVX" "_pDest", %%"XDI"\n\t" + V_MOVNTQ ("(%%"XDI")", "%%mm0") + + // simple bob last 8 bytes + MOVX" "_Last8", %%"XDX"\n\t" + LEAX" (%%"XBX", %%"XDX"), %%"XSI"\n\t" //"XSI", ["XBX"+"XDX"] + "movq (%%"XSI"), %%mm0\n\t" + // pavgb mm0, qword ptr["XSI"+"XCX"] + V_PAVGB ("%%mm0", "(%%"XSI", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XSI"+"XCX"], mm2, ShiftMask) + V_MOVNTQ ("(%%"XDI", %%"XDX")", "%%mm0") // qword ptr["XDI"+"XDX"], mm0) +#endif + // now loop and get the middle qwords + MOVX" "_pSrc", %%"XSI"\n\t" + MOVX" "_pSrcP", %%"XDI"\n\t" + MOVX" $8, %%"XDX"\n\t" // curr offset longo all lines + + "1:\n\t" + MOVX" "_pBobP", %%"XAX"\n\t" + ADDX" $8, %%"XDI"\n\t" + ADDX" $8, %%"XSI"\n\t" + ADDX" $8, %%"XBX"\n\t" + ADDX" %%"XDX", %%"XAX"\n\t" + +#ifdef USE_STRANGE_BOB +#include "StrangeBob.inc" +#else +#include "WierdBob.inc" +#endif + + // For non-SSE2: + // through out most of the rest of this loop we will maintain + // mm4 our min bob value + // mm5 best weave pixels so far + // mm6 our max Bob value + // mm7 best weighted pixel ratings so far + + // We will keep a slight bias to using the weave pixels + // from the current location, by rating them by the min distance + // from the Bob value instead of the avg distance from that value. + // our best and only rating so far + "pcmpeqb %%mm7, %%mm7\n\t" // ffff, say we didn't find anything good yet + +#endif diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVA.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVA.inc new file mode 100644 index 0000000..3e3d19b --- /dev/null +++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVA.inc @@ -0,0 +1,6 @@ +// -*- c++ -*- + +// Searches the center vertical line above center and below, in both the old +// and new fields, but takes averages. These are even pixel addresses. + MERGE4PIXavg("(%%"XDI", %%"XCX", 2)", "(%%"XSI")") // down, up + MERGE4PIXavg("(%%"XDI")", "(%%"XSI", %%"XCX", 2)") // up, down diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVAH.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVAH.inc new file mode 100644 index 0000000..33155bc --- /dev/null +++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVAH.inc @@ -0,0 +1,6 @@ +// -*- c++ -*- + +// Searches the center vertical line above center and below, in both the old +// and new fields, but takes averages. These are even pixel addresses. + MERGE4PIXavgH("(%%"XDI", %%"XCX", 2)", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "(%%"XSI")") // down, up + MERGE4PIXavgH("(%%"XDI")", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "(%%"XSI", %%"XCX", 2)") // up, down diff --git a/gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc b/gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc new file mode 100644 index 0000000..c1d2b5b --- /dev/null +++ b/gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc @@ -0,0 +1,322 @@ +// -*- c++ -*- + + // First, get and save our possible Bob values + // Assume our pixels are layed out as follows with x the calc'd bob value + // and the other pixels are from the current field + // + // j a b c k current field + // x calculated line + // m d e f n current field + // + // we calc the bob value luma value as: + // if |j - n| < Thres && |a - m| > Thres + // avg(j,n) + // end if + // if |k - m| < Thres && |c - n| > Thres + // avg(k,m) + // end if + // if |c - d| < Thres && |b - f| > Thres + // avg(c,d) + // end if + // if |a - f| < Thres && |b - d| > Thres + // avg(a,f) + // end if + // if |b - e| < Thres + // avg(b,e) + // end if + // pickup any thing not yet set with avg(b,e) + + // j, n + "pxor %%mm5, %%mm5\n\t" + "pxor %%mm6, %%mm6\n\t" + "pxor %%mm7, %%mm7\n\t" + + "movq -2(%%"XBX"), %%mm0\n\t" // value a from top left + "movq -4(%%"XBX", %%"XCX"), %%mm1\n\t" // value m from bottom right + + "movq %%mm0, %%mm3\n\t" + "psubusb %%mm1, %%mm3\n\t" + "psubusb %%mm0, %%mm1\n\t" + "por %%mm1, %%mm3\n\t" // abs(a,m) + + "psubusb "_DiffThres", %%mm3\n\t" // nonzero where abs(a,m) > Thres else 0 + "pxor %%mm4, %%mm4\n\t" + "pcmpeqb %%mm4, %%mm3\n\t" // now ff where abs(a,m) < Thres, else 00 + "pcmpeqb %%mm3, %%mm4\n\t" // here ff where abs(a,m) > Thres, else 00 + + + "movq -4(%%"XBX"), %%mm0\n\t" // value j + "movq 4(%%"XBX", %%"XCX"), %%mm1\n\t" // value n + "movq %%mm0, %%mm2\n\t" + "pavgb %%mm1, %%mm2\n\t" // avg(j,n) + "movq %%mm0, %%mm3\n\t" + "psubusb %%mm1, %%mm0\n\t" + "psubusb %%mm3, %%mm1\n\t" + "por %%mm1, %%mm0\n\t" // abs(j,n) + + "movq %%mm0, %%mm1\n\t" + "psubusb "_DiffThres", %%mm1\n\t" // nonzero where abs(j,n) > Thres else 0 + "pxor %%mm3, %%mm3\n\t" + "pcmpeqb %%mm3, %%mm1\n\t" // now ff where abs(j,n) < Thres, else 00 + + "pand %%mm4, %%mm1\n\t" + + "pand %%mm1, %%mm2\n\t" + "pand %%mm1, %%mm0\n\t" + + "movq %%mm1, %%mm3\n\t" + "pxor %%mm5, %%mm3\n\t" + "pand %%mm3, %%mm6\n\t" + "pand %%mm3, %%mm7\n\t" + "pand %%mm3, %%mm5\n\t" + + "por %%mm1, %%mm5\n\t" + "por %%mm2, %%mm6\n\t" + "por %%mm0, %%mm7\n\t" + + // k & m + "movq 2(%%"XBX"), %%mm0\n\t" // value c from top left + "movq 4(%%"XBX", %%"XCX"), %%mm1\n\t" // value n from bottom right + + "movq %%mm0, %%mm3\n\t" + "psubusb %%mm1, %%mm3\n\t" + "psubusb %%mm0, %%mm1\n\t" + "por %%mm1, %%mm3\n\t" // abs(c,n) + + "psubusb "_DiffThres", %%mm3\n\t" // nonzero where abs(c,n) > Thres else 0 + "pxor %%mm4, %%mm4\n\t" + "pcmpeqb %%mm4, %%mm3\n\t" // now ff where abs(c,n) < Thres, else 00 + "pcmpeqb %%mm3, %%mm4\n\t" // here ff where abs(c,n) > Thres, else 00 + + + "movq 4(%%"XBX"), %%mm0\n\t" // value k + "movq -4(%%"XBX", %%"XCX"), %%mm1\n\t" // value m + "movq %%mm0, %%mm2\n\t" + V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(k,m) + "movq %%mm0, %%mm3\n\t" + "psubusb %%mm1, %%mm0\n\t" + "psubusb %%mm3, %%mm1\n\t" + "por %%mm1, %%mm0\n\t" // abs(k,m) + + "movq %%mm0, %%mm1\n\t" + "psubusb "_DiffThres", %%mm1\n\t" // nonzero where abs(k,m) > Thres else 0 + "pxor %%mm3, %%mm3\n\t" + "pcmpeqb %%mm3, %%mm1\n\t" // now ff where abs(k,m) < Thres, else 00 + + "pand %%mm4, %%mm1\n\t" + + "pand %%mm1, %%mm2\n\t" + "pand %%mm1, %%mm0\n\t" + + "movq %%mm1, %%mm3\n\t" + "pxor %%mm5, %%mm3\n\t" + "pand %%mm3, %%mm6\n\t" + "pand %%mm3, %%mm7\n\t" + "pand %%mm3, %%mm5\n\t" + + "por %%mm1, %%mm5\n\t" + "por %%mm2, %%mm6\n\t" + "por %%mm0, %%mm7\n\t" + + + // c & d + "movq (%%"XBX"), %%mm0\n\t" // value b from top left + "movq 2(%%"XBX", %%"XCX"), %%mm1\n\t" // value f from bottom right + + "movq %%mm0, %%mm3\n\t" + "psubusb %%mm1, %%mm3\n\t" + "psubusb %%mm0, %%mm1\n\t" + "por %%mm1, %%mm3\n\t" // abs(b,f) + + "psubusb "_DiffThres", %%mm3\n\t" // nonzero where abs(b,f) > Thres else 0 + "pxor %%mm4, %%mm4\n\t" + "pcmpeqb %%mm4, %%mm3\n\t" // now ff where abs(b,f) < Thres, else 00 + "pcmpeqb %%mm3, %%mm4\n\t" // here ff where abs(b,f) > Thres, else 00 + + "movq 2(%%"XBX"), %%mm0\n\t" // value c + "movq -2(%%"XBX", %%"XCX"), %%mm1\n\t" // value d + "movq %%mm0, %%mm2\n\t" + V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(c,d) + "movq %%mm0, %%mm3\n\t" + "psubusb %%mm1, %%mm0\n\t" + "psubusb %%mm3, %%mm1\n\t" + "por %%mm1, %%mm0\n\t" // abs(c,d) + + "movq %%mm0, %%mm1\n\t" + "psubusb "_DiffThres", %%mm1\n\t" // nonzero where abs(c,d) > Thres else 0 + "pxor %%mm3, %%mm3\n\t" + "pcmpeqb %%mm3, %%mm1\n\t" // now ff where abs(c,d) < Thres, else 00 + + "pand %%mm4, %%mm1\n\t" + + "pand %%mm1, %%mm2\n\t" + "pand %%mm1, %%mm0\n\t" + + "movq %%mm1, %%mm3\n\t" + "pxor %%mm5, %%mm3\n\t" + "pand %%mm3, %%mm6\n\t" + "pand %%mm3, %%mm7\n\t" + "pand %%mm3, %%mm5\n\t" + + "por %%mm1, %%mm5\n\t" + "por %%mm2, %%mm6\n\t" + "por %%mm0, %%mm7\n\t" + + // a & f + "movq (%%"XBX"), %%mm0\n\t" // value b from top left + "movq -2(%%"XBX", %%"XCX"), %%mm1\n\t" // value d from bottom right + + "movq %%mm0, %%mm3\n\t" + "psubusb %%mm1, %%mm3\n\t" + "psubusb %%mm0, %%mm1\n\t" + "por %%mm1, %%mm3\n\t" // abs(b,d) + + "psubusb "_DiffThres", %%mm3\n\t" // nonzero where abs(b,d) > Thres else 0 + "pxor %%mm4, %%mm4\n\t" + "pcmpeqb %%mm4, %%mm3\n\t" // now ff where abs(b,d) < Thres, else 00 + "pcmpeqb %%mm3, %%mm4\n\t" // here ff where abs(b,d) > Thres, else 00 + + "movq -2(%%"XBX"), %%mm0\n\t" // value a + "movq 2(%%"XBX", %%"XCX"), %%mm1\n\t" // value f + "movq %%mm0, %%mm2\n\t" + V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(a,f) + "movq %%mm0, %%mm3\n\t" + "psubusb %%mm1, %%mm0\n\t" + "psubusb %%mm3, %%mm1\n\t" + "por %%mm1, %%mm0\n\t" // abs(a,f) + + "movq %%mm0, %%mm1\n\t" + "psubusb "_DiffThres", %%mm1\n\t" // nonzero where abs(a,f) > Thres else 0 + "pxor %%mm3, %%mm3\n\t" + "pcmpeqb %%mm3, %%mm1\n\t" // now ff where abs(a,f) < Thres, else 00 + + "pand %%mm4, %%mm1\n\t" + + "pand %%mm1, %%mm2\n\t" + "pand %%mm1, %%mm0\n\t" + + "movq %%mm1, %%mm3\n\t" + "pxor %%mm5, %%mm3\n\t" + "pand %%mm3, %%mm6\n\t" + "pand %%mm3, %%mm7\n\t" + "pand %%mm3, %%mm5\n\t" + + "por %%mm1, %%mm5\n\t" + "por %%mm2, %%mm6\n\t" + "por %%mm0, %%mm7\n\t" + + "pand "_YMask", %%mm5\n\t" // mask out chroma from here + "pand "_YMask", %%mm6\n\t" // mask out chroma from here + "pand "_YMask", %%mm7\n\t" // mask out chroma from here + + // b,e + "movq (%%"XBX"), %%mm0\n\t" // value b from top + "movq (%%"XBX", %%"XCX"), %%mm1\n\t" // value e from bottom + "movq %%mm0, %%mm2\n\t" + V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(b,e) + "movq %%mm0, %%mm3\n\t" + "psubusb %%mm1, %%mm0\n\t" + "psubusb %%mm3, %%mm1\n\t" + "por %%mm1, %%mm0\n\t" // abs(b,e) + + "movq %%mm0, %%mm1\n\t" + "psubusb "_DiffThres", %%mm1\n\t" // nonzero where abs(b,e) > Thres else 0 + "pxor %%mm3, %%mm3\n\t" + "pcmpeqb %%mm3, %%mm1\n\t" // now ff where abs(b,e) < Thres, else 00 + + "pand %%mm1, %%mm2\n\t" + "pand %%mm1, %%mm0\n\t" + + "movq %%mm1, %%mm3\n\t" + "pxor %%mm5, %%mm3\n\t" + "pand %%mm3, %%mm6\n\t" + "pand %%mm3, %%mm7\n\t" + "pand %%mm3, %%mm5\n\t" + + "por %%mm1, %%mm5\n\t" + "por %%mm2, %%mm6\n\t" + "por %%mm0, %%mm7\n\t" + + // bob in any leftovers + "movq (%%"XBX"), %%mm0\n\t" // value b from top + "movq (%%"XBX", %%"XCX"), %%mm1\n\t" // value e from bottom + + +// We will also calc here the max/min values to later limit comb +// so the max excursion will not exceed the Max_Comb constant + +#ifdef SKIP_SEARCH + "movq %%mm0, %%mm2\n\t" +// pminub %%mm2, %%mm1 + V_PMINUB ("%%mm2", "%%mm1", "%%mm4") + +// pmaxub %%mm6, %%mm2 // clip our current results so far to be above this + V_PMAXUB ("%%mm6", "%%mm2") + "movq %%mm0, %%mm2\n\t" + V_PMAXUB ("%%mm2", "%%mm1") +// pminub %%mm6, %%mm2 // clip our current results so far to be below this + V_PMINUB ("%%mm6", "%%mm2", "%%mm4") + +#else + "movq %%mm0, %%mm2\n\t" + "movq (%%"XAX"), %%mm4\n\t" + "psubusb %%mm4, %%mm2\n\t" + "psubusb %%mm0, %%mm4\n\t" + "por %%mm2, %%mm4\n\t" // abs diff + + "movq %%mm1, %%mm2\n\t" + "movq (%%"XAX", %%"XCX"), %%mm3\n\t" + "psubusb %%mm3, %%mm2\n\t" + "psubusb %%mm1, %%mm3\n\t" + "por %%mm2, %%mm3\n\t" // abs diff +// pmaxub %%mm3, %%mm4 // top or bottom pixel moved most + V_PMAXUB ("%%mm3", "%%mm4") // top or bottom pixel moved most + "psubusb "_DiffThres", %%mm3\n\t" // moved more than allowed? or goes to 0? + "pxor %%mm4, %%mm4\n\t" + "pcmpeqb %%mm4, %%mm3\n\t" // now ff where low motion, else high motion + + "movq %%mm0, %%mm2\n\t" +// pminub %%mm2, %%mm1 + V_PMINUB ("%%mm2", "%%mm1", "%%mm4") + +// pmaxub %%mm6, %%mm2 // clip our current results so far to be above this + V_PMAXUB ("%%mm6", "%%mm2") + + "psubusb %%mm3, %%mm2\n\t" // maybe decrease it to 0000.. if no surround motion + "movq %%mm2, "_Min_Vals"\n\t" + + "movq %%mm0, %%mm2\n\t" + V_PMAXUB ("%%mm2", "%%mm1") +// pminub %%mm6, %%mm2 // clip our current results so far to be below this + V_PMINUB ("%%mm6", "%%mm2", "%%mm4") + "paddusb %%mm3, %%mm2\n\t" // maybe increase it to ffffff if no surround motion + "movq %%mm2, "_Max_Vals"\n\t" +#endif + + "movq %%mm0, %%mm2\n\t" +// pavgb %%mm2, %%mm1 // avg(b,e) + V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(b,e) + + "movq %%mm0, %%mm3\n\t" + "psubusb %%mm1, %%mm3\n\t" + "psubusb %%mm0, %%mm1\n\t" + "por %%mm1, %%mm3\n\t" // abs(b,e) + "movq %%mm3, %%mm1\n\t" // keep copy of diffs + + "pxor %%mm4, %%mm4\n\t" + "psubusb %%mm7, %%mm3\n\t" // nonzero where new weights bigger, else 0 + "pcmpeqb %%mm4, %%mm3\n\t" // now ff where new better, else 00 + "pcmpeqb %%mm0, %%mm0\n\t" + "pandn %%mm0, %%mm5\n\t" + "por %%mm5, %%mm3\n\t" + "pcmpeqb %%mm3, %%mm4\n\t" // here ff where old better, else 00 + + "pand %%mm3, %%mm1\n\t" + "pand %%mm3, %%mm2\n\t" + + "pand %%mm4, %%mm6\n\t" + "pand %%mm4, %%mm7\n\t" + + "por %%mm2, %%mm6\n\t" // our x2 value + "por %%mm1, %%mm7\n\t" // our x2 diffs + "movq %%mm7, %%mm4\n\t" // save as bob uncertainty indicator diff --git a/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc b/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc new file mode 100644 index 0000000..f046f5e --- /dev/null +++ b/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc @@ -0,0 +1,220 @@ +/* + * GStreamer + * Copyright (c) 2002 Tom Barry All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +/* + * Relicensed for GStreamer from GPL to LGPL with permit from Tom Barry. + * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578 + */ + + +#ifndef TopFirst +#define TopFirst IsOdd +#endif + +#ifdef SEFUNC +#undef SEFUNC +#endif + +#if defined(IS_SSE) +#define SEFUNC(x) Search_Effort_SSE_##x() +#elif defined(IS_3DNOW) +#define SEFUNC(x) Search_Effort_3DNOW_##x() +#else +#define SEFUNC(x) Search_Effort_MMX_##x() +#endif + +#include "TomsMoCompAll2.inc" + +#define USE_STRANGE_BOB + +#include "TomsMoCompAll2.inc" + +#undef USE_STRANGE_BOB + + +void FUNCT_NAME(GstDeinterlace2* object) +{ + pMyMemcpy = object->pMemcpy; + + /* double stride do address just every odd/even scanline */ + src_pitch = object->field_stride; + dst_pitch = object->output_stride; + rowsize = object->line_length; + FldHeight = object->field_height; + + pCopySrc = GST_BUFFER_DATA(object->field_history[object->history_count-1].buf); + pCopySrcP = GST_BUFFER_DATA(object->field_history[object->history_count-3].buf); + pWeaveSrc = GST_BUFFER_DATA(object->field_history[object->history_count-2].buf); + pWeaveSrcP = GST_BUFFER_DATA(object->field_history[object->history_count-4].buf); + + /* use bottom field and interlace top field */ + if (object->field_history[object->history_count-2].flags == PICTURE_INTERLACED_BOTTOM) { + IsOdd = 1; + + // if we have an odd field we copy an even field and weave an odd field + pCopyDest = GST_BUFFER_DATA(object->out_buf); + pWeaveDest = pCopyDest + dst_pitch; + } + /* do it vice verca */ + else { + + IsOdd = 0; + // if we have an even field we copy an odd field and weave an even field + pCopyDest = GST_BUFFER_DATA(object->out_buf) + dst_pitch; + pWeaveDest = GST_BUFFER_DATA(object->out_buf); + } + + +#ifdef IS_SSE2 + // SSE2 support temporarily deleted +#endif + + // copy 1st and last weave lines + Fieldcopy(pWeaveDest, pCopySrc, rowsize, + 1, dst_pitch*2, src_pitch); + Fieldcopy(pWeaveDest+(FldHeight-1)*dst_pitch*2, + pCopySrc+(FldHeight-1)*src_pitch, rowsize, + 1, dst_pitch*2, src_pitch); + +#ifdef USE_VERTICAL_FILTER + // Vertical Filter currently not implemented for DScaler !! + // copy 1st and last lines the copy field + Fieldcopy(pCopyDest, pCopySrc, rowsize, + 1, dst_pitch*2, src_pitch); + Fieldcopy(pCopyDest+(FldHeight-1)*dst_pitch*2, + pCopySrc+(FldHeight-1)*src_pitch, rowsize, + 1, dst_pitch*2, src_pitch); +#else + + // copy all of the copy field + Fieldcopy(pCopyDest, pCopySrc, rowsize, + FldHeight, dst_pitch*2, src_pitch); +#endif + // then go fill in the hard part, being variously lazy depending upon + // SearchEffort + + if(!UseStrangeBob) { + if (SearchEffort == 0) + { + SEFUNC(0); + } + else if (SearchEffort <= 1) + { + SEFUNC(1); + } + /* else if (SearchEffort <= 2) + { + SEFUNC(2); + } + */ + else if (SearchEffort <= 3) + { + SEFUNC(3); + } + else if (SearchEffort <= 5) + { + SEFUNC(5); + } + else if (SearchEffort <= 9) + { + SEFUNC(9); + } + else if (SearchEffort <= 11) + { + SEFUNC(11); + } + else if (SearchEffort <= 13) + { + SEFUNC(13); + } + else if (SearchEffort <= 15) + { + SEFUNC(15); + } + else if (SearchEffort <= 19) + { + SEFUNC(19); + } + else if (SearchEffort <= 21) + { + SEFUNC(21); + } + else + { + SEFUNC(Max); + } + } + else + { + if (SearchEffort == 0) + { + SEFUNC(0_SB); + } + else if (SearchEffort <= 1) + { + SEFUNC(1_SB); + } + /* else if (SearchEffort <= 2) + { + SEFUNC(2_SB); + } + */ + else if (SearchEffort <= 3) + { + SEFUNC(3_SB); + } + else if (SearchEffort <= 5) + { + SEFUNC(5_SB); + } + else if (SearchEffort <= 9) + { + SEFUNC(9_SB); + } + else if (SearchEffort <= 11) + { + SEFUNC(11_SB); + } + else if (SearchEffort <= 13) + { + SEFUNC(13_SB); + } + else if (SearchEffort <= 15) + { + SEFUNC(15_SB); + } + else if (SearchEffort <= 19) + { + SEFUNC(19_SB); + } + else if (SearchEffort <= 21) + { + SEFUNC(21_SB); + } + else + { + SEFUNC(Max_SB); + } + } + +#ifdef ARCH_386 + __asm__ __volatile__("emms"); +#endif +} diff --git a/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc b/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc new file mode 100644 index 0000000..baf1a1a --- /dev/null +++ b/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc @@ -0,0 +1,174 @@ +// -*- c++ -*- + +#ifdef SEARCH_EFFORT_FUNC +#undef SEARCH_EFFORT_FUNC +#endif + +#ifdef USE_STRANGE_BOB +#define SEARCH_EFFORT_FUNC(n) SEFUNC(n##_SB) +#else +#define SEARCH_EFFORT_FUNC(n) SEFUNC(n) +#endif + +int SEARCH_EFFORT_FUNC(0) // we don't try at all ;-) +{ + //see Search_Effort_Max() for comments +#define SKIP_SEARCH +#include "SearchLoopTop.inc" +#include "SearchLoopBottom.inc" +#undef SKIP_SEARCH +} + +int SEARCH_EFFORT_FUNC(1) +{ + //see Search_Effort_Max() for comments +#include "SearchLoopTop.inc" + RESET_CHROMA // pretend chroma diffs was 255 each +#include "SearchLoop0A.inc" +#include "SearchLoopBottom.inc" +} + +int SEARCH_EFFORT_FUNC(3) +{ + //see Search_Effort_Max() for comments +#include "SearchLoopTop.inc" +#include "SearchLoopOddA2.inc" + RESET_CHROMA // pretend chroma diffs was 255 each +#include "SearchLoop0A.inc" +#include "SearchLoopBottom.inc" +} + +int SEARCH_EFFORT_FUNC(5) +{ + //see Search_Effort_Max() for comments +#include "SearchLoopTop.inc" +#include "SearchLoopOddA2.inc" +#include "SearchLoopOddAH2.inc" + RESET_CHROMA // pretend chroma diffs was 255 each +#include "SearchLoop0A.inc" +#include "SearchLoopBottom.inc" +} + +// 3x3 search +int SEARCH_EFFORT_FUNC(9) +{ + //see SearchEffortMax() for comments +#include "SearchLoopTop.inc" +#include "SearchLoopOddA.inc" + RESET_CHROMA // pretend chroma diffs was 255 each +#include "SearchLoopVA.inc" +#include "SearchLoop0A.inc" +#include "SearchLoopBottom.inc" +} + +// Search 9 with 2 H-half pels added +int SEARCH_EFFORT_FUNC(11) +{ + //see SearchEffortMax() for comments +#include "SearchLoopTop.inc" +#include "SearchLoopOddA.inc" +#include "SearchLoopOddAH2.inc" + RESET_CHROMA // pretend chroma diffs was 255 each +#include "SearchLoopVA.inc" +#include "SearchLoop0A.inc" +#include "SearchLoopBottom.inc" +} + +// Search 11 with 2 V-half pels added +int SEARCH_EFFORT_FUNC(13) +{ + //see SearchEffortMax() for comments +#include "SearchLoopTop.inc" +#include "SearchLoopOddA.inc" +#include "SearchLoopOddAH2.inc" + RESET_CHROMA // pretend chroma diffs was 255 each +#include "SearchLoopVAH.inc" +#include "SearchLoopVA.inc" +#include "SearchLoop0A.inc" +#include "SearchLoopBottom.inc" +} + +// 5x3 +int SEARCH_EFFORT_FUNC(15) +{ + //see SearchEffortMax() for comments +#include "SearchLoopTop.inc" +#include "SearchLoopOddA.inc" + RESET_CHROMA // pretend chroma diffs was 255 each +#include "SearchLoopEdgeA.inc" +#include "SearchLoopVA.inc" +#include "SearchLoop0A.inc" +#include "SearchLoopBottom.inc" +} + +// 5x3 + 4 half pels +int SEARCH_EFFORT_FUNC(19) +{ + //see SearchEffortMax() for comments +#include "SearchLoopTop.inc" +#include "SearchLoopOddA.inc" +#include "SearchLoopOddAH2.inc" + RESET_CHROMA // pretend chroma diffs was 255 each +#include "SearchLoopEdgeA.inc" +#include "SearchLoopVAH.inc" +#include "SearchLoopVA.inc" +#include "SearchLoop0A.inc" +#include "SearchLoopBottom.inc" +} + +// Handle one 4x1 block of pixels +// Search a 7x3 area, no half pels + +int SEARCH_EFFORT_FUNC(21) +{ + //see SearchLoopTop.inc for comments +#include "SearchLoopTop.inc" + + // odd addresses -- the pixels at odd address wouldn't generate + // good luma values but we will mask those off + +#include "SearchLoopOddA6.inc" // 4 odd v half pels, 3 to left & right +#include "SearchLoopOddA.inc" // 6 odd pels, 1 to left & right + + RESET_CHROMA // pretend chroma diffs was 255 each + + // even addresses -- use both luma and chroma from these + // search averages of 2 pixels left and right +#include "SearchLoopEdgeA.inc" + // search vertical line and averages, -1,0,+1 +#include "SearchLoopVA.inc" + // blend our results and loop +#include "SearchLoop0A.inc" +#include "SearchLoopBottom.inc" +} + +// Handle one 4x1 block of pixels +// Search a 9x3 area, no half pels +int SEARCH_EFFORT_FUNC(Max) +{ + //see SearchLoopTop.inc for comments +#include "SearchLoopTop.inc" + + // odd addresses -- the pixels at odd address wouldn't generate + // good luma values but we will mask those off + +#include "SearchLoopOddA6.inc" // 4 odd v half pels, 3 to left & right +#include "SearchLoopOddA.inc" // 6 odd pels, 1 to left & right + + RESET_CHROMA // pretend chroma diffs was 255 each + + // even addresses -- use both luma and chroma from these + // search averages of 4 pixels left and right +#include "SearchLoopEdgeA8.inc" + // search averages of 2 pixels left and right +#include "SearchLoopEdgeA.inc" + // search vertical line and averages, -1,0,+1 +#include "SearchLoopVA.inc" + // blend our results and loop +#include "SearchLoop0A.inc" +#include "SearchLoopBottom.inc" +} + +#undef SEARCH_EFFORT_FUNC + + diff --git a/gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc b/gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc new file mode 100644 index 0000000..36fd9d2 --- /dev/null +++ b/gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc @@ -0,0 +1,189 @@ +// -*- c++ -*- + + // First, get and save our possible Bob values + // Assume our pixels are layed out as follows with x the calc'd bob value + // and the other pixels are from the current field + // + // j a b c k current field + // x calculated line + // m d e f n current field + // + // we calc the bob value as: + // x2 = either avg(a,f), avg(c,d), avg(b,e), avg(j,n), or avg(k,m) + + // selected for the smallest of abs(a,f), abs(c,d), or abs(b,e), etc. + + // a,f + "movq -2(%%"XBX"), %%mm0\n\t" // value a from top left + "movq 2(%%"XBX", %%"XCX"), %%mm1\n\t" // value f from bottom right + "movq %%mm0, %%mm6\n\t" +// pavgb %%mm6, %%mm1 // avg(a,f), also best so far + V_PAVGB ("%%mm6", "%%mm1", "%%mm7", _ShiftMask) // avg(a,f), also best so far + "movq %%mm0, %%mm7\n\t" + "psubusb %%mm1, %%mm7\n\t" + "psubusb %%mm0, %%mm1\n\t" + "por %%mm1, %%mm7\n\t" // abs diff, also best so far + + // c,d + "movq 2(%%"XBX"), %%mm0\n\t" // value a from top left + "movq -2(%%"XBX", %%"XCX"), %%mm1\n\t" // value f from bottom right + "movq %%mm0, %%mm2\n\t" +// pavgb %%mm2, %%mm1 // avg(c,d) + V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(c,d) + "movq %%mm0, %%mm3\n\t" + "psubusb %%mm1, %%mm3\n\t" + "psubusb %%mm0, %%mm1\n\t" + "por %%mm1, %%mm3\n\t" // abs(c,d) + "movq %%mm3, %%mm1\n\t" // keep copy + + "psubusb %%mm7, %%mm3\n\t" // nonzero where new weights bigger, else 0 + "pxor %%mm4, %%mm4\n\t" + "pcmpeqb %%mm4, %%mm3\n\t" // now ff where new better, else 00 + "pcmpeqb %%mm3, %%mm4\n\t" // here ff where old better, else 00 + + "pand %%mm3, %%mm1\n\t" // keep only better new avg and abs + "pand %%mm3, %%mm2\n\t" + + "pand %%mm4, %%mm6\n\t" + "pand %%mm4, %%mm7\n\t" + + "por %%mm2, %%mm6\n\t" // and merge new & old vals keeping best + "por %%mm1, %%mm7\n\t" + "por "_UVMask", %%mm7\n\t" // but we know chroma is worthless so far + "pand "_YMask", %%mm5\n\t" // mask out chroma from here also + + // j,n + "movq -4(%%"XBX"), %%mm0\n\t" // value j from top left + "movq 4(%%"XBX", %%"XCX"), %%mm1\n\t" // value n from bottom right + "movq %%mm0, %%mm2\n\t" +// pavgb %%mm2, %%mm1 // avg(j,n) + V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(j,n) + "movq %%mm0, %%mm3\n\t" + "psubusb %%mm1, %%mm3\n\t" + "psubusb %%mm0, %%mm1\n\t" + "por %%mm1, %%mm3\n\t" // abs(j-n) + "movq %%mm3, %%mm1\n\t" // keep copy + + "psubusb %%mm7, %%mm3\n\t" // nonzero where new weights bigger, else 0 + "pxor %%mm4, %%mm4\n\t" + "pcmpeqb %%mm4, %%mm3\n\t" // now ff where new better, else 00 + "pcmpeqb %%mm3, %%mm4\n\t" // here ff where old better, else 00 + + "pand %%mm3, %%mm1\n\t" // keep only better new avg and abs + "pand %%mm2, %%mm3\n\t" + + "pand %%mm4, %%mm6\n\t" + "pand %%mm4, %%mm7\n\t" + + "por %%mm3, %%mm6\n\t" // and merge new & old vals keeping best + "por %%mm1, %%mm7\n\t" // " + + // k, m + "movq 4(%%"XBX"), %%mm0\n\t" // value k from top right + "movq -4(%%"XBX", %%"XCX"), %%mm1\n\t" // value n from bottom left + "movq %%mm0, %%mm4\n\t" +// pavgb %%mm4, %%mm1 // avg(k,m) + V_PAVGB ("%%mm4", "%%mm1", "%%mm3", _ShiftMask) // avg(k,m) + + "movq %%mm0, %%mm3\n\t" + "psubusb %%mm1, %%mm3\n\t" + "psubusb %%mm0, %%mm1\n\t" + "por %%mm1, %%mm3\n\t" // abs(k,m) + "movq %%mm3, %%mm1\n\t" // keep copy + + "movq %%mm4, %%mm2\n\t" // avg(k,m) + + "psubusb %%mm7, %%mm3\n\t" // nonzero where new weights bigger, else 0 + "pxor %%mm4, %%mm4\n\t" + "pcmpeqb %%mm4, %%mm3\n\t" // now ff where new better, else 00 + "pcmpeqb %%mm3, %%mm4\n\t" // here ff where old better, else 00 + + "pand %%mm3, %%mm1\n\t" // keep only better new avg and abs + "pand %%mm2, %%mm3\n\t" + + "pand %%mm4, %%mm6\n\t" + "pand %%mm4, %%mm7\n\t" + + "por %%mm3, %%mm6\n\t" // and merge new & old vals keeping best + "por %%mm1, %%mm7\n\t" // " + + // b,e + "movq (%%"XBX"), %%mm0\n\t" // value b from top + "movq (%%"XBX", %%"XCX"), %%mm1\n\t" // value e from bottom + +// We will also calc here the max/min values to later limit comb +// so the max excursion will not exceed the Max_Comb constant + +#ifdef SKIP_SEARCH + "movq %%mm0, %%mm2\n\t" +// pminub %%mm2, %%mm1 + V_PMINUB ("%%mm2", "%%mm1", "%%mm4") + +// pmaxub %%mm6, %%mm2 // clip our current results so far to be above this + V_PMAXUB ("%%mm6", "%%mm2") + "movq %%mm0, %%mm2\n\t" + V_PMAXUB ("%%mm2", "%%mm1") +// pminub %%mm6, %%mm2 // clip our current results so far to be below this + V_PMINUB ("%%mm6", "%%mm2", "%%mm4") + +#else + "movq %%mm0, %%mm2\n\t" + "movq (%%"XAX"), %%mm4\n\t" + "psubusb %%mm4, %%mm2\n\t" + "psubusb %%mm0, %%mm4\n\t" + "por %%mm2, %%mm4\n\t" // abs diff + + "movq %%mm1, %%mm2\n\t" + "movq (%%"XAX", %%"XCX"), %%mm3\n\t" + "psubusb %%mm3, %%mm2\n\t" + "psubusb %%mm1, %%mm3\n\t" + "por %%mm2, %%mm3\n\t" // abs diff +// pmaxub %%mm3, %%mm4 // top or bottom pixel moved most + V_PMAXUB ("%%mm3", "%%mm4") // top or bottom pixel moved most + "psubusb "_Max_Mov", %%mm3\n\t" // moved more than allowed? or goes to 0? + "pxor %%mm4, %%mm4\n\t" + "pcmpeqb %%mm4, %%mm3\n\t" // now ff where low motion, else high motion + + "movq %%mm0, %%mm2\n\t" +// pminub %%mm2, %%mm1 + V_PMINUB ("%%mm2", "%%mm1", "%%mm4") + +// pmaxub %%mm6, %%mm2 // clip our current results so far to be above this + V_PMAXUB ("%%mm6", "%%mm2") + + "psubusb %%mm3, %%mm2\n\t" // maybe decrease it to 0000.. if no surround motion + "movq %%mm2, "_Min_Vals"\n\t" + + "movq %%mm0, %%mm2\n\t" + V_PMAXUB ("%%mm2", "%%mm1") +// pminub %%mm6, %%mm2 // clip our current results so far to be below this + V_PMINUB ("%%mm6", "%%mm2", "%%mm4") + "paddusb %%mm3, %%mm2\n\t" // maybe increase it to ffffff if no surround motion + "movq %%mm2, "_Max_Vals"\n\t" +#endif + + "movq %%mm0, %%mm2\n\t" +// pavgb %%mm2, %%mm1 // avg(b,e) + V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(b,e) + + "movq %%mm0, %%mm3\n\t" + "psubusb %%mm1, %%mm3\n\t" + "psubusb %%mm0, %%mm1\n\t" + "por %%mm1, %%mm3\n\t" // abs(c,d) + "movq %%mm3, %%mm1\n\t" // keep copy of diffs + + "pxor %%mm4, %%mm4\n\t" + "psubusb %%mm7, %%mm3\n\t" // nonzero where new weights bigger, else 0 + "pcmpeqb %%mm4, %%mm3\n\t" // now ff where new better, else 00 + "pcmpeqb %%mm3, %%mm4\n\t" // here ff where old better, else 00 + + "pand %%mm3, %%mm1\n\t" + "pand %%mm3, %%mm2\n\t" + + "pand %%mm4, %%mm6\n\t" + "pand %%mm4, %%mm7\n\t" + + "por %%mm2, %%mm6\n\t" // our x2 value + "por %%mm1, %%mm7\n\t" // our x2 diffs + "movq %%mm7, %%mm4\n\t" // save as bob uncertainty indicator + -- 2.7.4