const orc_int8 *ORC_RESTRICT ptr5;
const orc_int8 *ORC_RESTRICT ptr6;
const orc_int8 *ORC_RESTRICT ptr7;
- orc_int8 var40;
- orc_int8 var41;
- orc_int8 var42;
- orc_int8 var43;
orc_int8 var44;
orc_int8 var45;
orc_int8 var46;
orc_int8 var68;
orc_int8 var69;
orc_int8 var70;
- orc_int8 var71;
- orc_int8 var72;
- orc_int8 var73;
- orc_int8 var74;
ptr0 = (orc_int8 *) d1;
ptr4 = (orc_int8 *) s1;
ptr6 = (orc_int8 *) s3;
ptr7 = (orc_int8 *) s4;
+ /* 11: loadpb */
+ var44 = 0x00000080; /* 128 or 6.32404e-322f */
/* 13: loadpb */
- var46 = 0x00000080; /* 128 or 6.32404e-322f */
- /* 15: loadpb */
- var47 = 0x00000080; /* 128 or 6.32404e-322f */
- /* 29: loadpb */
- var54 = p1;
- /* 31: loadpb */
- var55 = p1;
+ var45 = 0x00000080; /* 128 or 6.32404e-322f */
+ /* 21: loadpb */
+ var46 = p1;
+ /* 23: loadpb */
+ var47 = p1;
for (i = 0; i < n; i++) {
/* 0: loadb */
- var40 = ptr5[i];
+ var49 = ptr4[i];
/* 1: loadb */
- var41 = ptr6[i];
- /* 2: avgub */
- var57 = ((orc_uint8) var40 + (orc_uint8) var41 + 1) >> 1;
+ var50 = ptr7[i];
+ /* 2: loadb */
+ var51 = ptr6[i];
/* 3: loadb */
- var42 = ptr4[i];
- /* 4: maxub */
- var58 = ORC_MAX ((orc_uint8) var42, (orc_uint8) var57);
- /* 5: loadb */
- var43 = ptr4[i];
+ var52 = ptr5[i];
+ /* 4: avgub */
+ var53 = ((orc_uint8) var52 + (orc_uint8) var51 + 1) >> 1;
+ /* 5: maxub */
+ var54 = ORC_MAX ((orc_uint8) var49, (orc_uint8) var53);
/* 6: minub */
- var59 = ORC_MIN ((orc_uint8) var43, (orc_uint8) var57);
+ var55 = ORC_MIN ((orc_uint8) var49, (orc_uint8) var53);
/* 7: subb */
- var60 = var58 - var59;
- /* 8: loadb */
- var44 = ptr7[i];
- /* 9: maxub */
- var61 = ORC_MAX ((orc_uint8) var44, (orc_uint8) var57);
- /* 10: loadb */
- var45 = ptr7[i];
- /* 11: minub */
- var62 = ORC_MIN ((orc_uint8) var45, (orc_uint8) var57);
- /* 12: subb */
- var63 = var61 - var62;
+ var56 = var54 - var55;
+ /* 8: maxub */
+ var57 = ORC_MAX ((orc_uint8) var50, (orc_uint8) var53);
+ /* 9: minub */
+ var58 = ORC_MIN ((orc_uint8) var50, (orc_uint8) var53);
+ /* 10: subb */
+ var59 = var57 - var58;
+ /* 12: xorb */
+ var60 = var56 ^ var44;
/* 14: xorb */
- var64 = var60 ^ var46;
- /* 16: xorb */
- var65 = var63 ^ var47;
- /* 17: cmpgtsb */
- var66 = (var64 > var65) ? (~0) : 0;
- /* 18: loadb */
- var48 = ptr4[i];
- /* 19: andb */
- var67 = var48 & var66;
- /* 20: loadb */
- var49 = ptr7[i];
- /* 21: andnb */
- var68 = (~var49) & var66;
- /* 22: orb */
- var69 = var67 | var68;
- /* 23: loadb */
- var50 = ptr5[i];
- /* 24: loadb */
- var51 = ptr6[i];
- /* 25: maxub */
- var70 = ORC_MAX ((orc_uint8) var50, (orc_uint8) var51);
- /* 26: loadb */
- var52 = ptr5[i];
- /* 27: loadb */
- var53 = ptr6[i];
- /* 28: minub */
- var71 = ORC_MIN ((orc_uint8) var52, (orc_uint8) var53);
- /* 30: addusb */
- var72 = ORC_CLAMP_UB ((orc_uint8) var70 + (orc_uint8) var54);
- /* 32: subusb */
- var73 = ORC_CLAMP_UB ((orc_uint8) var71 - (orc_uint8) var55);
- /* 33: minub */
- var74 = ORC_MIN ((orc_uint8) var69, (orc_uint8) var72);
- /* 34: maxub */
- var56 = ORC_MAX ((orc_uint8) var74, (orc_uint8) var73);
- /* 35: storeb */
- ptr0[i] = var56;
+ var61 = var59 ^ var45;
+ /* 15: cmpgtsb */
+ var62 = (var60 > var61) ? (~0) : 0;
+ /* 16: andb */
+ var63 = var50 & var62;
+ /* 17: andnb */
+ var64 = (~var62) & var49;
+ /* 18: orb */
+ var65 = var63 | var64;
+ /* 19: maxub */
+ var66 = ORC_MAX ((orc_uint8) var52, (orc_uint8) var51);
+ /* 20: minub */
+ var67 = ORC_MIN ((orc_uint8) var52, (orc_uint8) var51);
+ /* 22: addusb */
+ var68 = ORC_CLAMP_UB ((orc_uint8) var66 + (orc_uint8) var46);
+ /* 24: subusb */
+ var69 = ORC_CLAMP_UB ((orc_uint8) var67 - (orc_uint8) var47);
+ /* 25: minub */
+ var70 = ORC_MIN ((orc_uint8) var65, (orc_uint8) var68);
+ /* 26: maxub */
+ var48 = ORC_MAX ((orc_uint8) var70, (orc_uint8) var69);
+ /* 27: storeb */
+ ptr0[i] = var48;
}
}
const orc_int8 *ORC_RESTRICT ptr5;
const orc_int8 *ORC_RESTRICT ptr6;
const orc_int8 *ORC_RESTRICT ptr7;
- orc_int8 var40;
- orc_int8 var41;
- orc_int8 var42;
- orc_int8 var43;
orc_int8 var44;
orc_int8 var45;
orc_int8 var46;
orc_int8 var68;
orc_int8 var69;
orc_int8 var70;
- orc_int8 var71;
- orc_int8 var72;
- orc_int8 var73;
- orc_int8 var74;
ptr0 = (orc_int8 *) ex->arrays[0];
ptr4 = (orc_int8 *) ex->arrays[4];
ptr6 = (orc_int8 *) ex->arrays[6];
ptr7 = (orc_int8 *) ex->arrays[7];
+ /* 11: loadpb */
+ var44 = 0x00000080; /* 128 or 6.32404e-322f */
/* 13: loadpb */
- var46 = 0x00000080; /* 128 or 6.32404e-322f */
- /* 15: loadpb */
- var47 = 0x00000080; /* 128 or 6.32404e-322f */
- /* 29: loadpb */
- var54 = ex->params[24];
- /* 31: loadpb */
- var55 = ex->params[24];
+ var45 = 0x00000080; /* 128 or 6.32404e-322f */
+ /* 21: loadpb */
+ var46 = ex->params[24];
+ /* 23: loadpb */
+ var47 = ex->params[24];
for (i = 0; i < n; i++) {
/* 0: loadb */
- var40 = ptr5[i];
+ var49 = ptr4[i];
/* 1: loadb */
- var41 = ptr6[i];
- /* 2: avgub */
- var57 = ((orc_uint8) var40 + (orc_uint8) var41 + 1) >> 1;
+ var50 = ptr7[i];
+ /* 2: loadb */
+ var51 = ptr6[i];
/* 3: loadb */
- var42 = ptr4[i];
- /* 4: maxub */
- var58 = ORC_MAX ((orc_uint8) var42, (orc_uint8) var57);
- /* 5: loadb */
- var43 = ptr4[i];
+ var52 = ptr5[i];
+ /* 4: avgub */
+ var53 = ((orc_uint8) var52 + (orc_uint8) var51 + 1) >> 1;
+ /* 5: maxub */
+ var54 = ORC_MAX ((orc_uint8) var49, (orc_uint8) var53);
/* 6: minub */
- var59 = ORC_MIN ((orc_uint8) var43, (orc_uint8) var57);
+ var55 = ORC_MIN ((orc_uint8) var49, (orc_uint8) var53);
/* 7: subb */
- var60 = var58 - var59;
- /* 8: loadb */
- var44 = ptr7[i];
- /* 9: maxub */
- var61 = ORC_MAX ((orc_uint8) var44, (orc_uint8) var57);
- /* 10: loadb */
- var45 = ptr7[i];
- /* 11: minub */
- var62 = ORC_MIN ((orc_uint8) var45, (orc_uint8) var57);
- /* 12: subb */
- var63 = var61 - var62;
+ var56 = var54 - var55;
+ /* 8: maxub */
+ var57 = ORC_MAX ((orc_uint8) var50, (orc_uint8) var53);
+ /* 9: minub */
+ var58 = ORC_MIN ((orc_uint8) var50, (orc_uint8) var53);
+ /* 10: subb */
+ var59 = var57 - var58;
+ /* 12: xorb */
+ var60 = var56 ^ var44;
/* 14: xorb */
- var64 = var60 ^ var46;
- /* 16: xorb */
- var65 = var63 ^ var47;
- /* 17: cmpgtsb */
- var66 = (var64 > var65) ? (~0) : 0;
- /* 18: loadb */
- var48 = ptr4[i];
- /* 19: andb */
- var67 = var48 & var66;
- /* 20: loadb */
- var49 = ptr7[i];
- /* 21: andnb */
- var68 = (~var49) & var66;
- /* 22: orb */
- var69 = var67 | var68;
- /* 23: loadb */
- var50 = ptr5[i];
- /* 24: loadb */
- var51 = ptr6[i];
- /* 25: maxub */
- var70 = ORC_MAX ((orc_uint8) var50, (orc_uint8) var51);
- /* 26: loadb */
- var52 = ptr5[i];
- /* 27: loadb */
- var53 = ptr6[i];
- /* 28: minub */
- var71 = ORC_MIN ((orc_uint8) var52, (orc_uint8) var53);
- /* 30: addusb */
- var72 = ORC_CLAMP_UB ((orc_uint8) var70 + (orc_uint8) var54);
- /* 32: subusb */
- var73 = ORC_CLAMP_UB ((orc_uint8) var71 - (orc_uint8) var55);
- /* 33: minub */
- var74 = ORC_MIN ((orc_uint8) var69, (orc_uint8) var72);
- /* 34: maxub */
- var56 = ORC_MAX ((orc_uint8) var74, (orc_uint8) var73);
- /* 35: storeb */
- ptr0[i] = var56;
+ var61 = var59 ^ var45;
+ /* 15: cmpgtsb */
+ var62 = (var60 > var61) ? (~0) : 0;
+ /* 16: andb */
+ var63 = var50 & var62;
+ /* 17: andnb */
+ var64 = (~var62) & var49;
+ /* 18: orb */
+ var65 = var63 | var64;
+ /* 19: maxub */
+ var66 = ORC_MAX ((orc_uint8) var52, (orc_uint8) var51);
+ /* 20: minub */
+ var67 = ORC_MIN ((orc_uint8) var52, (orc_uint8) var51);
+ /* 22: addusb */
+ var68 = ORC_CLAMP_UB ((orc_uint8) var66 + (orc_uint8) var46);
+ /* 24: subusb */
+ var69 = ORC_CLAMP_UB ((orc_uint8) var67 - (orc_uint8) var47);
+ /* 25: minub */
+ var70 = ORC_MIN ((orc_uint8) var65, (orc_uint8) var68);
+ /* 26: maxub */
+ var48 = ORC_MAX ((orc_uint8) var70, (orc_uint8) var69);
+ /* 27: storeb */
+ ptr0[i] = var48;
}
}
orc_program_add_temporary (p, 1, "t6");
orc_program_add_temporary (p, 1, "t7");
orc_program_add_temporary (p, 1, "t8");
+ orc_program_add_temporary (p, 1, "t9");
+ orc_program_add_temporary (p, 1, "t10");
+ orc_program_add_temporary (p, 1, "t11");
+ orc_program_add_temporary (p, 1, "t12");
- orc_program_append_2 (p, "avgub", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_S3,
+ orc_program_append_2 (p, "loadb", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
+ ORC_VAR_D1);
+ orc_program_append_2 (p, "loadb", 0, ORC_VAR_T2, ORC_VAR_S4, ORC_VAR_D1,
ORC_VAR_D1);
- orc_program_append_2 (p, "maxub", 0, ORC_VAR_T4, ORC_VAR_S1, ORC_VAR_T1,
+ orc_program_append_2 (p, "loadb", 0, ORC_VAR_T3, ORC_VAR_S3, ORC_VAR_D1,
ORC_VAR_D1);
- orc_program_append_2 (p, "minub", 0, ORC_VAR_T5, ORC_VAR_S1, ORC_VAR_T1,
+ orc_program_append_2 (p, "loadb", 0, ORC_VAR_T4, ORC_VAR_S2, ORC_VAR_D1,
ORC_VAR_D1);
- orc_program_append_2 (p, "subb", 0, ORC_VAR_T2, ORC_VAR_T4, ORC_VAR_T5,
+ orc_program_append_2 (p, "avgub", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_T3,
ORC_VAR_D1);
- orc_program_append_2 (p, "maxub", 0, ORC_VAR_T4, ORC_VAR_S4, ORC_VAR_T1,
+ orc_program_append_2 (p, "maxub", 0, ORC_VAR_T8, ORC_VAR_T1, ORC_VAR_T5,
ORC_VAR_D1);
- orc_program_append_2 (p, "minub", 0, ORC_VAR_T5, ORC_VAR_S4, ORC_VAR_T1,
+ orc_program_append_2 (p, "minub", 0, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_T5,
ORC_VAR_D1);
- orc_program_append_2 (p, "subb", 0, ORC_VAR_T3, ORC_VAR_T4, ORC_VAR_T5,
+ orc_program_append_2 (p, "subb", 0, ORC_VAR_T6, ORC_VAR_T8, ORC_VAR_T9,
ORC_VAR_D1);
- orc_program_append_2 (p, "xorb", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
+ orc_program_append_2 (p, "maxub", 0, ORC_VAR_T8, ORC_VAR_T2, ORC_VAR_T5,
ORC_VAR_D1);
- orc_program_append_2 (p, "xorb", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_C1,
+ orc_program_append_2 (p, "minub", 0, ORC_VAR_T9, ORC_VAR_T2, ORC_VAR_T5,
ORC_VAR_D1);
- orc_program_append_2 (p, "cmpgtsb", 0, ORC_VAR_T5, ORC_VAR_T2, ORC_VAR_T3,
+ orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T8, ORC_VAR_T9,
ORC_VAR_D1);
- orc_program_append_2 (p, "andb", 0, ORC_VAR_T4, ORC_VAR_S1, ORC_VAR_T5,
+ orc_program_append_2 (p, "xorb", 0, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_C1,
ORC_VAR_D1);
- orc_program_append_2 (p, "andnb", 0, ORC_VAR_T5, ORC_VAR_S4, ORC_VAR_T5,
+ orc_program_append_2 (p, "xorb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1,
ORC_VAR_D1);
- orc_program_append_2 (p, "orb", 0, ORC_VAR_T6, ORC_VAR_T4, ORC_VAR_T5,
+ orc_program_append_2 (p, "cmpgtsb", 0, ORC_VAR_T9, ORC_VAR_T6, ORC_VAR_T7,
ORC_VAR_D1);
- orc_program_append_2 (p, "maxub", 0, ORC_VAR_T8, ORC_VAR_S2, ORC_VAR_S3,
+ orc_program_append_2 (p, "andb", 0, ORC_VAR_T8, ORC_VAR_T2, ORC_VAR_T9,
ORC_VAR_D1);
- orc_program_append_2 (p, "minub", 0, ORC_VAR_T7, ORC_VAR_S2, ORC_VAR_S3,
+ orc_program_append_2 (p, "andnb", 0, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T1,
ORC_VAR_D1);
- orc_program_append_2 (p, "addusb", 0, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_P1,
+ orc_program_append_2 (p, "orb", 0, ORC_VAR_T10, ORC_VAR_T8, ORC_VAR_T9,
ORC_VAR_D1);
- orc_program_append_2 (p, "subusb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_P1,
+ orc_program_append_2 (p, "maxub", 0, ORC_VAR_T12, ORC_VAR_T4, ORC_VAR_T3,
ORC_VAR_D1);
- orc_program_append_2 (p, "minub", 0, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T8,
+ orc_program_append_2 (p, "minub", 0, ORC_VAR_T11, ORC_VAR_T4, ORC_VAR_T3,
ORC_VAR_D1);
- orc_program_append_2 (p, "maxub", 0, ORC_VAR_D1, ORC_VAR_T6, ORC_VAR_T7,
+ orc_program_append_2 (p, "addusb", 0, ORC_VAR_T12, ORC_VAR_T12,
+ ORC_VAR_P1, ORC_VAR_D1);
+ orc_program_append_2 (p, "subusb", 0, ORC_VAR_T11, ORC_VAR_T11,
+ ORC_VAR_P1, ORC_VAR_D1);
+ orc_program_append_2 (p, "minub", 0, ORC_VAR_T10, ORC_VAR_T10,
+ ORC_VAR_T12, ORC_VAR_D1);
+ orc_program_append_2 (p, "maxub", 0, ORC_VAR_D1, ORC_VAR_T10, ORC_VAR_T11,
ORC_VAR_D1);
result = orc_program_compile (p);
#include "gstdeinterlacemethod.h"
#include <string.h>
-#ifdef HAVE_ORC
-#include <orc/orc.h>
-#endif
#include "tvtime.h"
// Blended Clip but this give too good results for the CPU to ignore here.
static inline void
-deinterlace_greedy_scanline_c (GstDeinterlaceMethodGreedyL * self,
- const guint8 * m0, const guint8 * t1,
- const guint8 * b1, const guint8 * m2, guint8 * output, gint width)
-{
- gint avg, l2_diff, lp2_diff, max, min, best;
- guint max_comb = self->max_comb;
-
- // L2 == m0
- // L1 == t1
- // L3 == b1
- // LP2 == m2
-
- while (width--) {
- avg = (*t1 + *b1) / 2;
-
- l2_diff = ABS (*m0 - avg);
- lp2_diff = ABS (*m2 - avg);
-
- if (l2_diff > lp2_diff)
- best = *m2;
- else
- best = *m0;
-
- max = MAX (*t1, *b1);
- min = MIN (*t1, *b1);
-
- if (max < 256 - max_comb)
- max += max_comb;
- else
- max = 255;
-
- if (min > max_comb)
- min -= max_comb;
- else
- min = 0;
-
- *output = CLAMP (best, min, max);
-
- // Advance to the next set of pixels.
- output += 1;
- m0 += 1;
- t1 += 1;
- b1 += 1;
- m2 += 1;
- }
-}
-
-static inline void
deinterlace_greedy_scanline_orc (GstDeinterlaceMethodGreedyL * self,
const guint8 * m0, const guint8 * t1,
const guint8 * b1, const guint8 * m2, guint8 * output, gint width)
deinterlace_line_greedy (output, m0, t1, b1, m2, self->max_comb, width);
}
-#ifdef BUILD_X86_ASM
-#include "mmx.h"
-static void
-deinterlace_greedy_scanline_mmx (GstDeinterlaceMethodGreedyL * self,
- const guint8 * m0, const guint8 * t1,
- const guint8 * b1, const guint8 * m2, guint8 * output, gint width)
-{
- mmx_t MaxComb;
- mmx_t ShiftMask;
-
- // How badly do we let it weave? 0-255
- MaxComb.ub[0] = self->max_comb;
- MaxComb.ub[1] = self->max_comb;
- MaxComb.ub[2] = self->max_comb;
- MaxComb.ub[3] = self->max_comb;
- MaxComb.ub[4] = self->max_comb;
- MaxComb.ub[5] = self->max_comb;
- MaxComb.ub[6] = self->max_comb;
- MaxComb.ub[7] = self->max_comb;
-
- ShiftMask.ub[0] = 0x7f;
- ShiftMask.ub[1] = 0x7f;
- ShiftMask.ub[2] = 0x7f;
- ShiftMask.ub[3] = 0x7f;
- ShiftMask.ub[4] = 0x7f;
- ShiftMask.ub[5] = 0x7f;
- ShiftMask.ub[6] = 0x7f;
- ShiftMask.ub[7] = 0x7f;
-
- // L2 == m0
- // L1 == t1
- // L3 == b1
- // LP2 == m2
-
- movq_m2r (MaxComb, mm6);
-
- for (; width > 7; width -= 8) {
- movq_m2r (*t1, mm1); // L1
- movq_m2r (*m0, mm2); // L2
- movq_m2r (*b1, mm3); // L3
- movq_m2r (*m2, mm0); // LP2
-
- // average L1 and L3 leave result in mm4
- movq_r2r (mm1, mm4); // L1
- movq_r2r (mm3, mm5); // L3
- psrlw_i2r (1, mm4); // L1/2
- pand_m2r (ShiftMask, mm4);
- psrlw_i2r (1, mm5); // L3/2
- pand_m2r (ShiftMask, mm5);
- paddusb_r2r (mm5, mm4); // (L1 + L3) / 2
-
- // get abs value of possible L2 comb
- movq_r2r (mm2, mm7); // L2
- psubusb_r2r (mm4, mm7); // L2 - avg
- movq_r2r (mm4, mm5); // avg
- psubusb_r2r (mm2, mm5); // avg - L2
- por_r2r (mm7, mm5); // abs(avg-L2)
-
- // get abs value of possible LP2 comb
- movq_r2r (mm0, mm7); // LP2
- psubusb_r2r (mm4, mm7); // LP2 - avg
- psubusb_r2r (mm0, mm4); // avg - LP2
- por_r2r (mm7, mm4); // abs(avg-LP2)
-
- // use L2 or LP2 depending upon which makes smaller comb
- psubusb_r2r (mm5, mm4); // see if it goes to zero
- psubusb_r2r (mm5, mm5); // 0
- pcmpeqb_r2r (mm5, mm4); // if (mm4=0) then FF else 0
- pcmpeqb_r2r (mm4, mm5); // opposite of mm4
-
- // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
- pand_r2r (mm2, mm5); // use L2 if mm5 == ff, else 0
- pand_r2r (mm0, mm4); // use LP2 if mm4 = ff, else 0
- por_r2r (mm5, mm4); // may the best win
-
- // Now lets clip our chosen value to be not outside of the range
- // of the high/low range L1-L3 by more than abs(L1-L3)
- // This allows some comb but limits the damages and also allows more
- // detail than a boring oversmoothed clip.
-
- movq_r2r (mm1, mm2); // copy L1
- psubusb_r2r (mm3, mm2); // - L3, with saturation
- paddusb_r2r (mm3, mm2); // now = Max(L1,L3)
-
- pcmpeqb_r2r (mm7, mm7); // all ffffffff
- psubusb_r2r (mm1, mm7); // - L1
- paddusb_r2r (mm7, mm3); // add, may sat at fff..
- psubusb_r2r (mm7, mm3); // now = Min(L1,L3)
-
- // allow the value to be above the high or below the low by amt of MaxComb
- paddusb_r2r (mm6, mm2); // increase max by diff
- psubusb_r2r (mm6, mm3); // lower min by diff
-
- psubusb_r2r (mm3, mm4); // best - Min
- paddusb_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
-
- pcmpeqb_r2r (mm7, mm7); // all ffffffff
- psubusb_r2r (mm4, mm7); // - Max(best,Min(best,L3)
- paddusb_r2r (mm7, mm2); // add may sat at FFF..
- psubusb_r2r (mm7, mm2); // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped
-
- movq_r2m (mm2, *output); // move in our clipped best
-
- // Advance to the next set of pixels.
- output += 8;
- m0 += 8;
- t1 += 8;
- b1 += 8;
- m2 += 8;
- }
- emms ();
- if (width > 0)
- deinterlace_greedy_scanline_c (self, m0, t1, b1, m2, output, width);
-}
-
-#include "sse.h"
-
-static void
-deinterlace_greedy_scanline_mmxext (GstDeinterlaceMethodGreedyL *
- self, const guint8 * m0, const guint8 * t1, const guint8 * b1,
- const guint8 * m2, guint8 * output, gint width)
-{
- mmx_t MaxComb;
-
- // How badly do we let it weave? 0-255
- MaxComb.ub[0] = self->max_comb;
- MaxComb.ub[1] = self->max_comb;
- MaxComb.ub[2] = self->max_comb;
- MaxComb.ub[3] = self->max_comb;
- MaxComb.ub[4] = self->max_comb;
- MaxComb.ub[5] = self->max_comb;
- MaxComb.ub[6] = self->max_comb;
- MaxComb.ub[7] = self->max_comb;
-
- // L2 == m0
- // L1 == t1
- // L3 == b1
- // LP2 == m2
-
- movq_m2r (MaxComb, mm6);
-
- for (; width > 7; width -= 8) {
- movq_m2r (*t1, mm1); // L1
- movq_m2r (*m0, mm2); // L2
- movq_m2r (*b1, mm3); // L3
- movq_m2r (*m2, mm0); // LP2
-
- // average L1 and L3 leave result in mm4
- movq_r2r (mm1, mm4); // L1
- pavgb_r2r (mm3, mm4); // (L1 + L3)/2
-
- // get abs value of possible L2 comb
- movq_r2r (mm2, mm7); // L2
- psubusb_r2r (mm4, mm7); // L2 - avg
- movq_r2r (mm4, mm5); // avg
- psubusb_r2r (mm2, mm5); // avg - L2
- por_r2r (mm7, mm5); // abs(avg-L2)
-
- // get abs value of possible LP2 comb
- movq_r2r (mm0, mm7); // LP2
- psubusb_r2r (mm4, mm7); // LP2 - avg
- psubusb_r2r (mm0, mm4); // avg - LP2
- por_r2r (mm7, mm4); // abs(avg-LP2)
-
- // use L2 or LP2 depending upon which makes smaller comb
- psubusb_r2r (mm5, mm4); // see if it goes to zero
- pxor_r2r (mm5, mm5); // 0
- pcmpeqb_r2r (mm5, mm4); // if (mm4=0) then FF else 0
- pcmpeqb_r2r (mm4, mm5); // opposite of mm4
-
- // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
- pand_r2r (mm2, mm5); // use L2 if mm5 == ff, else 0
- pand_r2r (mm0, mm4); // use LP2 if mm4 = ff, else 0
- por_r2r (mm5, mm4); // may the best win
-
- // Now lets clip our chosen value to be not outside of the range
- // of the high/low range L1-L3 by more than abs(L1-L3)
- // This allows some comb but limits the damages and also allows more
- // detail than a boring oversmoothed clip.
-
- movq_r2r (mm1, mm2); // copy L1
- pmaxub_r2r (mm3, mm2); // now = Max(L1,L3)
-
- pminub_r2r (mm1, mm3); // now = Min(L1,L3)
-
- // allow the value to be above the high or below the low by amt of MaxComb
- paddusb_r2r (mm6, mm2); // increase max by diff
- psubusb_r2r (mm6, mm3); // lower min by diff
-
-
- pmaxub_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
- pminub_r2r (mm4, mm2); // now = Min( Max(best, Min(L1,L3)), L2 )=L2 clipped
-
- movq_r2m (mm2, *output); // move in our clipped best
-
- // Advance to the next set of pixels.
- output += 8;
- m0 += 8;
- t1 += 8;
- b1 += 8;
- m2 += 8;
- }
- emms ();
-
- if (width > 0)
- deinterlace_greedy_scanline_c (self, m0, t1, b1, m2, output, width);
-}
-
-#endif
-
static void
deinterlace_frame_di_greedy_packed (GstDeinterlaceMethod * method,
const GstDeinterlaceField * history, guint history_count,
{
GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
GObjectClass *gobject_class = (GObjectClass *) klass;
-#ifdef BUILD_X86_ASM
- guint cpu_flags =
- orc_target_get_default_flags (orc_target_get_by_name ("mmx"));
-#endif
gobject_class->set_property = gst_deinterlace_method_greedy_l_set_property;
gobject_class->get_property = gst_deinterlace_method_greedy_l_get_property;
dim_class->deinterlace_frame_rgb = deinterlace_frame_di_greedy_packed;
dim_class->deinterlace_frame_bgr = deinterlace_frame_di_greedy_packed;
-#ifdef BUILD_X86_ASM
- if (cpu_flags & ORC_TARGET_MMX_MMXEXT) {
- klass->scanline = deinterlace_greedy_scanline_mmxext;
- } else if (cpu_flags & ORC_TARGET_MMX_MMX) {
- klass->scanline = deinterlace_greedy_scanline_mmx;
- } else {
- klass->scanline = deinterlace_greedy_scanline_c;
- }
-#else
- klass->scanline = deinterlace_greedy_scanline_c;
klass->scanline = deinterlace_greedy_scanline_orc;
-#endif
}
static void