Tizen 2.0 Release
[framework/multimedia/gst-plugins-good0.10.git] / gst / deinterlace / tvtime / tomsmocomp / SearchLoopTop.inc
1 // -*- c++ -*-
2
3 unsigned char* pDest;
4 const unsigned char* pSrcP;
5 const unsigned char* pSrc;
6 const unsigned char* pBob;
7 const unsigned char* pBobP;
8
9 // long is int32 on ARCH_368, int64 on ARCH_AMD64. Declaring it this way
10 // saves a lot of xor's to delete 64bit garbage.
11
12 #if defined(DBL_RESIZE) || defined(USE_FOR_DSCALER)
13 long        src_pitch2 = src_pitch;                     // even & odd lines are not interleaved in DScaler
14 #else
15 long        src_pitch2 = 2 * src_pitch;         // even & odd lines are interleaved in Avisynth
16 #endif
17
18
19 long        dst_pitch2 = 2 * dst_pitch;
20 long        y;
21
22 long     Last8;
23
24         pSrc  = pWeaveSrc;                      // points 1 weave line above
25         pSrcP = pWeaveSrcP;                     // " 
26
27 #ifdef DBL_RESIZE
28                 
29 #ifdef USE_VERTICAL_FILTER
30         pDest = pWeaveDest + dst_pitch2;
31 #else
32         pDest = pWeaveDest + 3*dst_pitch;
33 #endif
34
35 #else
36
37 #ifdef USE_VERTICAL_FILTER
38         pDest = pWeaveDest + dst_pitch;
39 #else
40         pDest = pWeaveDest + dst_pitch2;
41 #endif
42
43 #endif
44
45         if (TopFirst)
46         {
47                 pBob = pCopySrc + src_pitch2;      // remember one weave line just copied previously
48                 pBobP = pCopySrcP + src_pitch2;
49         }
50         else
51         {
52                 pBob =  pCopySrc;
53                 pBobP =  pCopySrcP;
54         }
55
56 #ifndef IS_C
57
58 #ifndef _pBob
59 #define _pBob       "%0"
60 #define _src_pitch2 "%1"
61 #define _ShiftMask  "%2"
62 #define _pDest      "%3"
63 #define _dst_pitchw "%4"
64 #define _Last8      "%5"
65 #define _pSrc       "%6"
66 #define _pSrcP      "%7"
67 #define _pBobP      "%8"
68 #define _DiffThres  "%9"
69 #define _Min_Vals   "%10"
70 #define _Max_Vals   "%11"
71 #define _FOURS      "%12"
72 #define _TENS       "%13"
73 #define _ONES       "%14"
74 #define _UVMask     "%15"
75 #define _Max_Mov    "%16"
76 #define _YMask      "%17"
77 #define _oldbx      "%18"
78 #endif
79         Last8 = (rowsize-8);
80
81         for (y=1; y < FldHeight-1; y++) 
82         {       
83           long  dst_pitchw = dst_pitch; // local stor so asm can ref
84           int64_t Max_Mov   = 0x0404040404040404ull; 
85           int64_t DiffThres = 0x0f0f0f0f0f0f0f0full; 
86           int64_t YMask     = 0x00ff00ff00ff00ffull; // keeps only luma
87           int64_t UVMask    = 0xff00ff00ff00ff00ull; // keeps only chroma
88           int64_t TENS      = 0x0a0a0a0a0a0a0a0aull; 
89           int64_t FOURS     = 0x0404040404040404ull; 
90           int64_t ONES      = 0x0101010101010101ull; 
91           int64_t Min_Vals  = 0x0000000000000000ull;
92           int64_t Max_Vals  = 0x0000000000000000ull;
93           int64_t ShiftMask = 0xfefffefffefffeffull;
94
95           long oldbx;
96
97                 // pretend it's indented -->>
98         __asm__ __volatile__
99             (
100              // Loop general reg usage
101              //
102              // XAX - pBobP, then pDest 
103              // XBX - pBob
104              // XCX - src_pitch2
105              // XDX - current offset
106              // XDI - prev weave pixels, 1 line up
107              // XSI - next weave pixels, 1 line up
108
109              // Save "XBX" (-fPIC)
110              MOVX" %%"XBX", "_oldbx"\n\t"
111              
112              // simple bob first 8 bytes
113              MOVX"      "_pBob",        %%"XBX"\n\t"
114              MOVX"      "_src_pitch2",  %%"XCX"\n\t"
115
116 #ifdef USE_VERTICAL_FILTER
117              "movq          (%%"XBX"),        %%mm0\n\t"
118              "movq          (%%"XBX", %%"XCX"), %%mm1\n\t" //, qword ptr["XBX"+"XCX"]
119              "movq          %%mm0,          %%mm2\n\t"
120              V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)            // halfway between
121              V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask)            // 1/4 way
122              V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask)            // 3/4 way
123              MOVX"              "_pDest",       %%"XDI"\n\t"
124              MOVX"              "_dst_pitchw",  %%"XAX"\n\t"
125              V_MOVNTQ   ("(%%"XDI")", "%%mm0")
126              V_MOVNTQ   ("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1
127
128              // simple bob last 8 bytes
129              MOVX"              "_Last8", %%"XDX"\n\t"
130              LEAX"              (%%"XBX", %%"XDX"), %%"XSI"\n\t"  // ["XBX"+"XDX"]
131              "movq          (%%"XSI"), %%mm0\n\t"
132              "movq          (%%"XSI", %%"XCX"), %%mm1\n\t"    // qword ptr["XSI"+"XCX"]
133              "movq          %%mm0, %%mm2\n\t"
134              V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)            // halfway between
135              V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask)            // 1/4 way
136              V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask)            // 3/4 way
137              ADDX"              %%"XDX", %%"XDI"\n\t"                                           // last 8 bytes of dest
138              V_MOVNTQ   ("%%"XDI"", "%%mm0")
139              V_MOVNTQ   ("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1)
140
141 #else
142              "movq      (%%"XBX"), %%mm0\n\t"
143              //         pavgb   mm0, qword ptr["XBX"+"XCX"]
144              V_PAVGB ("%%mm0", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XBX"+"XCX"], mm2, ShiftMask)
145              MOVX"              "_pDest", %%"XDI"\n\t"
146              V_MOVNTQ   ("(%%"XDI")", "%%mm0")
147
148              // simple bob last 8 bytes
149              MOVX"              "_Last8", %%"XDX"\n\t"
150              LEAX"              (%%"XBX", %%"XDX"), %%"XSI"\n\t" //"XSI", ["XBX"+"XDX"]
151              "movq          (%%"XSI"), %%mm0\n\t"
152              //         pavgb   mm0, qword ptr["XSI"+"XCX"]
153              V_PAVGB    ("%%mm0", "(%%"XSI", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XSI"+"XCX"], mm2, ShiftMask)
154              V_MOVNTQ   ("(%%"XDI", %%"XDX")", "%%mm0") // qword ptr["XDI"+"XDX"], mm0)
155 #endif
156              // now loop and get the middle qwords
157              MOVX"              "_pSrc", %%"XSI"\n\t"
158              MOVX"              "_pSrcP", %%"XDI"\n\t"
159              MOVX"              $8, %%"XDX"\n\t"                                // curr offset longo all lines
160
161              "1:\n\t"   
162              MOVX"              "_pBobP", %%"XAX"\n\t"
163              ADDX"              $8, %%"XDI"\n\t"
164              ADDX"              $8, %%"XSI"\n\t"
165              ADDX"              $8, %%"XBX"\n\t"
166              ADDX"              %%"XDX", %%"XAX"\n\t"
167
168 #ifdef USE_STRANGE_BOB
169 #include "StrangeBob.inc"
170 #else
171 #include "WierdBob.inc"
172 #endif
173
174              // For non-SSE2:
175              // through out most of the rest of this loop we will maintain
176              // mm4             our min bob value
177              // mm5             best weave pixels so far
178              // mm6             our max Bob value 
179              // mm7             best weighted pixel ratings so far
180              
181              // We will keep a slight bias to using the weave pixels
182              // from the current location, by rating them by the min distance
183              // from the Bob value instead of the avg distance from that value.
184              // our best and only rating so far
185              "pcmpeqb   %%mm7, %%mm7\n\t"                       // ffff, say we didn't find anything good yet
186
187 #else
188         Last8 = (rowsize - 4);
189
190         for (y=1; y < FldHeight-1; y++)
191         {
192           #ifdef USE_STRANGE_BOB
193           long DiffThres = 0x0f;
194           #endif
195
196           #ifndef SKIP_SEARCH
197           long weave[2], MaxVals[2], MinVals[2];
198           #endif
199
200           long diff[2], best[2], avg[2], diff2[2], out[2], x;
201
202 #ifdef USE_VERTICAL_FILTER
203              pDest[0] = (3 * pBob[0] + pBob[src_pitch2]) / 4;
204              pDest[1] = (3 * pBob[1] + pBob[src_pitch2 + 1]) / 4;
205              pDest[2] = (3 * pBob[2] + pBob[src_pitch2 + 2]) / 4;
206              pDest[3] = (3 * pBob[3] + pBob[src_pitch2 + 3]) / 4;
207              pDest[dst_pitchw] = (pBob[0] + 3 * pBob[src_pitch2]) / 4;
208              pDest[dst_pitchw + 1] = (pBob[1] + 3 * pBob[src_pitch2 + 1]) / 4;
209              pDest[dst_pitchw + 2] = (pBob[2] + 3 * pBob[src_pitch2 + 2]) / 4;
210              pDest[dst_pitchw + 3] = (pBob[3] + 3 * pBob[src_pitch2 + 3]) / 4;
211
212              // simple bob last byte
213              pDest[Last8] = (3 * pBob[Last8] + pBob[Last8 + src_pitch2]) / 4;
214              pDest[Last8 + 1] = (3 * pBob[Last8 + 1] + pBob[Last8 + src_pitch2 + 1]) / 4;
215              pDest[Last8 + 2] = (3 * pBob[Last8 + 2] + pBob[Last8 + src_pitch2 + 2]) / 4;
216              pDest[Last8 + 3] = (3 * pBob[Last8 + 3] + pBob[Last8 + src_pitch2 + 3]) / 4;
217              pDest[Last8 + src_pitch2] = (pBob[Last8] + 3 * pBob[Last8 + src_pitch2]) / 4;
218              pDest[Last8 + src_pitch2 + 1] = (pBob[Last8 + 1] + 3 * pBob[Last8 + src_pitch2 + 1]) / 4;
219              pDest[Last8 + src_pitch2 + 2] = (pBob[Last8 + 2] + 3 * pBob[Last8 + src_pitch2 + 2]) / 4;
220              pDest[Last8 + src_pitch2 + 3] = (pBob[Last8 + 3] + 3 * pBob[Last8 + src_pitch2 + 3]) / 4;
221 #else
222              pDest[0] = (pBob[0] + pBob[src_pitch2 + 1]) / 2;
223              pDest[1] = (pBob[1] + pBob[src_pitch2 + 1]) / 2;
224              pDest[2] = (pBob[2] + pBob[src_pitch2 + 2]) / 2;
225              pDest[3] = (pBob[3] + pBob[src_pitch2 + 3]) / 2;
226
227              // simple bob last byte
228              pDest[Last8] = (pBob[Last8] + pBob[Last8 + src_pitch2]) / 2;
229              pDest[Last8 + 1] = (pBob[Last8 + 1] + pBob[Last8 + src_pitch2 + 1]) / 2;
230              pDest[Last8 + 2] = (pBob[Last8 + 2] + pBob[Last8 + src_pitch2 + 2]) / 2;
231              pDest[Last8 + 3] = (pBob[Last8 + 3] + pBob[Last8 + src_pitch2 + 3]) / 2;
232 #endif
233
234              pBob += 4;
235              pBobP += 4;
236              pSrc += 4;
237              pSrcP += 4;
238
239              for (x=4; x < Last8; x += 2) {
240
241 #ifdef USE_STRANGE_BOB
242 #include "StrangeBob.inc"
243 #else
244 #include "WierdBob.inc"
245 #endif
246
247              // We will keep a slight bias to using the weave pixels
248              // from the current location, by rating them by the min distance
249              // from the Bob value instead of the avg distance from that value.
250              // our best and only rating so far
251              diff[0] = diff[1] = 255;
252
253
254 #endif