4 const unsigned char* pSrcP;
5 const unsigned char* pSrc;
6 const unsigned char* pBob;
7 const unsigned char* pBobP;
9 // long is int32 on ARCH_368, int64 on ARCH_AMD64. Declaring it this way
10 // saves a lot of xor's to delete 64bit garbage.
12 #if defined(DBL_RESIZE) || defined(USE_FOR_DSCALER)
13 long src_pitch2 = src_pitch; // even & odd lines are not interleaved in DScaler
15 long src_pitch2 = 2 * src_pitch; // even & odd lines are interleaved in Avisynth
19 long dst_pitch2 = 2 * dst_pitch;
24 pSrc = pWeaveSrc; // points 1 weave line above
25 pSrcP = pWeaveSrcP; // "
29 #ifdef USE_VERTICAL_FILTER
30 pDest = pWeaveDest + dst_pitch2;
32 pDest = pWeaveDest + 3*dst_pitch;
37 #ifdef USE_VERTICAL_FILTER
38 pDest = pWeaveDest + dst_pitch;
40 pDest = pWeaveDest + dst_pitch2;
47 pBob = pCopySrc + src_pitch2; // remember one weave line just copied previously
48 pBobP = pCopySrcP + src_pitch2;
60 #define _src_pitch2 "%1"
61 #define _ShiftMask "%2"
63 #define _dst_pitchw "%4"
68 #define _DiffThres "%9"
69 #define _Min_Vals "%10"
70 #define _Max_Vals "%11"
75 #define _Max_Mov "%16"
81 for (y=1; y < FldHeight-1; y++)
83 long dst_pitchw = dst_pitch; // local stor so asm can ref
84 int64_t Max_Mov = 0x0404040404040404ull;
85 int64_t DiffThres = 0x0f0f0f0f0f0f0f0full;
86 int64_t YMask = 0x00ff00ff00ff00ffull; // keeps only luma
87 int64_t UVMask = 0xff00ff00ff00ff00ull; // keeps only chroma
88 int64_t TENS = 0x0a0a0a0a0a0a0a0aull;
89 int64_t FOURS = 0x0404040404040404ull;
90 int64_t ONES = 0x0101010101010101ull;
91 int64_t Min_Vals = 0x0000000000000000ull;
92 int64_t Max_Vals = 0x0000000000000000ull;
93 int64_t ShiftMask = 0xfefffefffefffeffull;
97 // pretend it's indented -->>
100 // Loop general reg usage
102 // XAX - pBobP, then pDest
105 // XDX - current offset
106 // XDI - prev weave pixels, 1 line up
107 // XSI - next weave pixels, 1 line up
109 // Save "XBX" (-fPIC)
110 MOVX" %%"XBX", "_oldbx"\n\t"
112 // simple bob first 8 bytes
113 MOVX" "_pBob", %%"XBX"\n\t"
114 MOVX" "_src_pitch2", %%"XCX"\n\t"
116 #ifdef USE_VERTICAL_FILTER
117 "movq (%%"XBX"), %%mm0\n\t"
118 "movq (%%"XBX", %%"XCX"), %%mm1\n\t" //, qword ptr["XBX"+"XCX"]
119 "movq %%mm0, %%mm2\n\t"
120 V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // halfway between
121 V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask) // 1/4 way
122 V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask) // 3/4 way
123 MOVX" "_pDest", %%"XDI"\n\t"
124 MOVX" "_dst_pitchw", %%"XAX"\n\t"
125 V_MOVNTQ ("(%%"XDI")", "%%mm0")
126 V_MOVNTQ ("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1
128 // simple bob last 8 bytes
129 MOVX" "_Last8", %%"XDX"\n\t"
130 LEAX" (%%"XBX", %%"XDX"), %%"XSI"\n\t" // ["XBX"+"XDX"]
131 "movq (%%"XSI"), %%mm0\n\t"
132 "movq (%%"XSI", %%"XCX"), %%mm1\n\t" // qword ptr["XSI"+"XCX"]
133 "movq %%mm0, %%mm2\n\t"
134 V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // halfway between
135 V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask) // 1/4 way
136 V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask) // 3/4 way
137 ADDX" %%"XDX", %%"XDI"\n\t" // last 8 bytes of dest
138 V_MOVNTQ ("%%"XDI"", "%%mm0")
139 V_MOVNTQ ("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1)
142 "movq (%%"XBX"), %%mm0\n\t"
143 // pavgb mm0, qword ptr["XBX"+"XCX"]
144 V_PAVGB ("%%mm0", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XBX"+"XCX"], mm2, ShiftMask)
145 MOVX" "_pDest", %%"XDI"\n\t"
146 V_MOVNTQ ("(%%"XDI")", "%%mm0")
148 // simple bob last 8 bytes
149 MOVX" "_Last8", %%"XDX"\n\t"
150 LEAX" (%%"XBX", %%"XDX"), %%"XSI"\n\t" //"XSI", ["XBX"+"XDX"]
151 "movq (%%"XSI"), %%mm0\n\t"
152 // pavgb mm0, qword ptr["XSI"+"XCX"]
153 V_PAVGB ("%%mm0", "(%%"XSI", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XSI"+"XCX"], mm2, ShiftMask)
154 V_MOVNTQ ("(%%"XDI", %%"XDX")", "%%mm0") // qword ptr["XDI"+"XDX"], mm0)
156 // now loop and get the middle qwords
157 MOVX" "_pSrc", %%"XSI"\n\t"
158 MOVX" "_pSrcP", %%"XDI"\n\t"
159 MOVX" $8, %%"XDX"\n\t" // curr offset longo all lines
162 MOVX" "_pBobP", %%"XAX"\n\t"
163 ADDX" $8, %%"XDI"\n\t"
164 ADDX" $8, %%"XSI"\n\t"
165 ADDX" $8, %%"XBX"\n\t"
166 ADDX" %%"XDX", %%"XAX"\n\t"
168 #ifdef USE_STRANGE_BOB
169 #include "StrangeBob.inc"
171 #include "WierdBob.inc"
175 // through out most of the rest of this loop we will maintain
176 // mm4 our min bob value
177 // mm5 best weave pixels so far
178 // mm6 our max Bob value
179 // mm7 best weighted pixel ratings so far
181 // We will keep a slight bias to using the weave pixels
182 // from the current location, by rating them by the min distance
183 // from the Bob value instead of the avg distance from that value.
184 // our best and only rating so far
185 "pcmpeqb %%mm7, %%mm7\n\t" // ffff, say we didn't find anything good yet
188 Last8 = (rowsize - 4);
190 for (y=1; y < FldHeight-1; y++)
192 #ifdef USE_STRANGE_BOB
193 long DiffThres = 0x0f;
197 long weave[2], MaxVals[2], MinVals[2];
200 long diff[2], best[2], avg[2], diff2[2], out[2], x;
202 #ifdef USE_VERTICAL_FILTER
203 pDest[0] = (3 * pBob[0] + pBob[src_pitch2]) / 4;
204 pDest[1] = (3 * pBob[1] + pBob[src_pitch2 + 1]) / 4;
205 pDest[2] = (3 * pBob[2] + pBob[src_pitch2 + 2]) / 4;
206 pDest[3] = (3 * pBob[3] + pBob[src_pitch2 + 3]) / 4;
207 pDest[dst_pitchw] = (pBob[0] + 3 * pBob[src_pitch2]) / 4;
208 pDest[dst_pitchw + 1] = (pBob[1] + 3 * pBob[src_pitch2 + 1]) / 4;
209 pDest[dst_pitchw + 2] = (pBob[2] + 3 * pBob[src_pitch2 + 2]) / 4;
210 pDest[dst_pitchw + 3] = (pBob[3] + 3 * pBob[src_pitch2 + 3]) / 4;
212 // simple bob last byte
213 pDest[Last8] = (3 * pBob[Last8] + pBob[Last8 + src_pitch2]) / 4;
214 pDest[Last8 + 1] = (3 * pBob[Last8 + 1] + pBob[Last8 + src_pitch2 + 1]) / 4;
215 pDest[Last8 + 2] = (3 * pBob[Last8 + 2] + pBob[Last8 + src_pitch2 + 2]) / 4;
216 pDest[Last8 + 3] = (3 * pBob[Last8 + 3] + pBob[Last8 + src_pitch2 + 3]) / 4;
217 pDest[Last8 + src_pitch2] = (pBob[Last8] + 3 * pBob[Last8 + src_pitch2]) / 4;
218 pDest[Last8 + src_pitch2 + 1] = (pBob[Last8 + 1] + 3 * pBob[Last8 + src_pitch2 + 1]) / 4;
219 pDest[Last8 + src_pitch2 + 2] = (pBob[Last8 + 2] + 3 * pBob[Last8 + src_pitch2 + 2]) / 4;
220 pDest[Last8 + src_pitch2 + 3] = (pBob[Last8 + 3] + 3 * pBob[Last8 + src_pitch2 + 3]) / 4;
222 pDest[0] = (pBob[0] + pBob[src_pitch2 + 1]) / 2;
223 pDest[1] = (pBob[1] + pBob[src_pitch2 + 1]) / 2;
224 pDest[2] = (pBob[2] + pBob[src_pitch2 + 2]) / 2;
225 pDest[3] = (pBob[3] + pBob[src_pitch2 + 3]) / 2;
227 // simple bob last byte
228 pDest[Last8] = (pBob[Last8] + pBob[Last8 + src_pitch2]) / 2;
229 pDest[Last8 + 1] = (pBob[Last8 + 1] + pBob[Last8 + src_pitch2 + 1]) / 2;
230 pDest[Last8 + 2] = (pBob[Last8 + 2] + pBob[Last8 + src_pitch2 + 2]) / 2;
231 pDest[Last8 + 3] = (pBob[Last8 + 3] + pBob[Last8 + src_pitch2 + 3]) / 2;
239 for (x=4; x < Last8; x += 2) {
241 #ifdef USE_STRANGE_BOB
242 #include "StrangeBob.inc"
244 #include "WierdBob.inc"
247 // We will keep a slight bias to using the weave pixels
248 // from the current location, by rating them by the min distance
249 // from the Bob value instead of the avg distance from that value.
250 // our best and only rating so far
251 diff[0] = diff[1] = 255;