2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 C MMX MMX2 3DNow AltiVec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74 //Changelog: use the Subversion log
77 #include "libavutil/avutil.h"
89 //#define DEBUG_BRIGHTNESS
90 #include "postprocess.h"
91 #include "postprocess_internal.h"
93 unsigned postproc_version(void)
95 return LIBPOSTPROC_VERSION_INT;
102 #define GET_MODE_BUFFER_SIZE 500
103 #define OPTIONS_ARRAY_SIZE 10
105 #define TEMP_STRIDE 8
106 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
108 #if defined(ARCH_X86)
109 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
110 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
111 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
112 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
113 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
114 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
115 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
116 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
119 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
122 static struct PPFilter filters[]=
124 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
125 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
126 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
127 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
128 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
129 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
130 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
131 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
132 {"dr", "dering", 1, 5, 6, DERING},
133 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
134 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
135 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
136 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
137 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
138 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
139 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
140 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
141 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
142 {NULL, NULL,0,0,0,0} //End Marker
145 static const char *replaceTable[]=
147 "default", "hb:a,vb:a,dr:a",
148 "de", "hb:a,vb:a,dr:a",
149 "fast", "h1:a,v1:a,dr:a",
150 "fa", "h1:a,v1:a,dr:a",
151 "ac", "ha:a:128:7,va:a,dr:a",
156 #if defined(ARCH_X86)
157 static inline void prefetchnta(void *p)
159 asm volatile( "prefetchnta (%0)\n\t"
164 static inline void prefetcht0(void *p)
166 asm volatile( "prefetcht0 (%0)\n\t"
171 static inline void prefetcht1(void *p)
173 asm volatile( "prefetcht1 (%0)\n\t"
178 static inline void prefetcht2(void *p)
180 asm volatile( "prefetcht2 (%0)\n\t"
186 /* The horizontal functions exist only in C because the MMX
187 * code is faster with vertical filters and transposing. */
190 * Check if the given 8x8 Block is mostly "flat"
192 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
196 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
197 const int dcThreshold= dcOffset*2 + 1;
199 for(y=0; y<BLOCK_SIZE; y++){
200 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
201 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
202 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
203 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
204 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
205 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
206 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
209 return numEq > c->ppMode.flatnessThreshold;
213 * Check if the middle 8x8 Block in the given 8x16 block is flat
215 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
219 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
220 const int dcThreshold= dcOffset*2 + 1;
222 src+= stride*4; // src points to begin of the 8x8 Block
223 for(y=0; y<BLOCK_SIZE-1; y++){
224 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
225 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
226 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
227 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
228 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
229 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
230 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
231 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
234 return numEq > c->ppMode.flatnessThreshold;
237 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
242 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
244 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
246 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
248 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
253 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
260 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
266 for(x=0; x<BLOCK_SIZE; x+=4){
267 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
268 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
269 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
270 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
275 for(x=0; x<BLOCK_SIZE; x++){
276 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
283 for(x=0; x<BLOCK_SIZE; x++){
288 int v= src[x + y*stride];
292 if(max-min > 2*QP) return 0;
298 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
300 if( isHorizDC_C(src, stride, c) ){
301 if( isHorizMinMaxOk_C(src, stride, c->QP) )
310 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
312 if( isVertDC_C(src, stride, c) ){
313 if( isVertMinMaxOk_C(src, stride, c->QP) )
322 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
325 for(y=0; y<BLOCK_SIZE; y++){
326 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
328 if(FFABS(middleEnergy) < 8*c->QP){
329 const int q=(dst[3] - dst[4])/2;
330 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
331 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
333 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
337 d*= FFSIGN(-middleEnergy);
358 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
359 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
361 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
364 for(y=0; y<BLOCK_SIZE; y++){
365 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
366 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
369 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
370 sums[1] = sums[0] - first + dst[3];
371 sums[2] = sums[1] - first + dst[4];
372 sums[3] = sums[2] - first + dst[5];
373 sums[4] = sums[3] - first + dst[6];
374 sums[5] = sums[4] - dst[0] + dst[7];
375 sums[6] = sums[5] - dst[1] + last;
376 sums[7] = sums[6] - dst[2] + last;
377 sums[8] = sums[7] - dst[3] + last;
378 sums[9] = sums[8] - dst[4] + last;
380 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
381 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
382 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
383 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
384 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
385 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
386 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
387 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
394 * Experimental Filter 1 (Horizontal)
395 * will not damage linear gradients
396 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
397 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
398 * MMX2 version does correct clipping C version does not
399 * not identical with the vertical one
401 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
404 static uint64_t *lut= NULL;
408 lut = av_malloc(256*8);
411 int v= i < 128 ? 2*i : 2*(i-256);
413 //Simulate 112242211 9-Tap filter
414 uint64_t a= (v/16) & 0xFF;
415 uint64_t b= (v/8) & 0xFF;
416 uint64_t c= (v/4) & 0xFF;
417 uint64_t d= (3*v/8) & 0xFF;
419 //Simulate piecewise linear interpolation
420 uint64_t a= (v/16) & 0xFF;
421 uint64_t b= (v*3/16) & 0xFF;
422 uint64_t c= (v*5/16) & 0xFF;
423 uint64_t d= (7*v/16) & 0xFF;
424 uint64_t A= (0x100 - a)&0xFF;
425 uint64_t B= (0x100 - b)&0xFF;
426 uint64_t C= (0x100 - c)&0xFF;
427 uint64_t D= (0x100 - c)&0xFF;
429 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
430 (D<<24) | (C<<16) | (B<<8) | (A);
431 //lut[i] = (v<<32) | (v<<24);
435 for(y=0; y<BLOCK_SIZE; y++){
436 int a= src[1] - src[2];
437 int b= src[3] - src[4];
438 int c= src[5] - src[6];
440 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
443 int v = d * FFSIGN(-b);
457 * accurate deblock filter
459 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
462 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
463 const int dcThreshold= dcOffset*2 + 1;
465 src+= step*4; // src points to begin of the 8x8 Block
469 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
470 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
471 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
472 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
473 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
474 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
475 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
476 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
477 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
478 if(numEq > c->ppMode.flatnessThreshold){
481 if(src[0] > src[step]){
489 if(src[x*step] > src[(x+1)*step]){
490 if(src[x *step] > max) max= src[ x *step];
491 if(src[(x+1)*step] < min) min= src[(x+1)*step];
493 if(src[(x+1)*step] > max) max= src[(x+1)*step];
494 if(src[ x *step] < min) min= src[ x *step];
498 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
499 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
502 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
503 sums[1] = sums[0] - first + src[3*step];
504 sums[2] = sums[1] - first + src[4*step];
505 sums[3] = sums[2] - first + src[5*step];
506 sums[4] = sums[3] - first + src[6*step];
507 sums[5] = sums[4] - src[0*step] + src[7*step];
508 sums[6] = sums[5] - src[1*step] + last;
509 sums[7] = sums[6] - src[2*step] + last;
510 sums[8] = sums[7] - src[3*step] + last;
511 sums[9] = sums[8] - src[4*step] + last;
513 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
514 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
515 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
516 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
517 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
518 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
519 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
520 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
523 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
525 if(FFABS(middleEnergy) < 8*QP){
526 const int q=(src[3*step] - src[4*step])/2;
527 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
528 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
530 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
534 d*= FFSIGN(-middleEnergy);
558 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
560 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
565 #define COMPILE_ALTIVEC
566 #endif //HAVE_ALTIVEC
568 #if defined(ARCH_X86)
570 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
574 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
578 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
579 #define COMPILE_3DNOW
581 #endif /* defined(ARCH_X86) */
592 #define RENAME(a) a ## _C
593 #include "postprocess_template.c"
596 #ifdef COMPILE_ALTIVEC
599 #define RENAME(a) a ## _altivec
600 #include "postprocess_altivec_template.c"
601 #include "postprocess_template.c"
610 #define RENAME(a) a ## _MMX
611 #include "postprocess_template.c"
620 #define RENAME(a) a ## _MMX2
621 #include "postprocess_template.c"
630 #define RENAME(a) a ## _3DNow
631 #include "postprocess_template.c"
634 // minor note: the HAVE_xyz is messed up after that line so do not use it.
636 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
637 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
639 PPContext *c= (PPContext *)vc;
640 PPMode *ppMode= (PPMode *)vm;
641 c->ppMode= *ppMode; //FIXME
643 // Using ifs here as they are faster than function pointers although the
644 // difference would not be measurable here but it is much better because
645 // someone might exchange the CPU whithout restarting MPlayer ;)
646 #ifdef RUNTIME_CPUDETECT
647 #if defined(ARCH_X86)
648 // ordered per speed fastest first
649 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
650 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
651 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
652 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
653 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
654 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
656 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
659 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
660 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
663 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
665 #else //RUNTIME_CPUDETECT
667 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
668 #elif defined (HAVE_3DNOW)
669 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
670 #elif defined (HAVE_MMX)
671 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
672 #elif defined (HAVE_ALTIVEC)
673 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
675 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
677 #endif //!RUNTIME_CPUDETECT
680 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
681 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
683 /* -pp Command line Help
685 #if LIBPOSTPROC_VERSION_INT < (52<<16)
686 const char *const pp_help=
688 const char pp_help[] =
690 "Available postprocessing filters:\n"
692 "short long name short long option Description\n"
693 "* * a autoq CPU power dependent enabler\n"
694 " c chrom chrominance filtering enabled\n"
695 " y nochrom chrominance filtering disabled\n"
696 " n noluma luma filtering disabled\n"
697 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
698 " 1. difference factor: default=32, higher -> more deblocking\n"
699 " 2. flatness threshold: default=39, lower -> more deblocking\n"
700 " the h & v deblocking filters share these\n"
701 " so you can't set different thresholds for h / v\n"
702 "vb vdeblock (2 threshold) vertical deblocking filter\n"
703 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
704 "va vadeblock (2 threshold) vertical deblocking filter\n"
705 "h1 x1hdeblock experimental h deblock filter 1\n"
706 "v1 x1vdeblock experimental v deblock filter 1\n"
707 "dr dering deringing filter\n"
708 "al autolevels automatic brightness / contrast\n"
709 " f fullyrange stretch luminance to (0..255)\n"
710 "lb linblenddeint linear blend deinterlacer\n"
711 "li linipoldeint linear interpolating deinterlace\n"
712 "ci cubicipoldeint cubic interpolating deinterlacer\n"
713 "md mediandeint median deinterlacer\n"
714 "fd ffmpegdeint ffmpeg deinterlacer\n"
715 "l5 lowpass5 FIR lowpass deinterlacer\n"
716 "de default hb:a,vb:a,dr:a\n"
717 "fa fast h1:a,v1:a,dr:a\n"
718 "ac ha:a:128:7,va:a,dr:a\n"
719 "tn tmpnoise (3 threshold) temporal noise reducer\n"
720 " 1. <= 2. <= 3. larger -> stronger filtering\n"
721 "fq forceQuant <quantizer> force quantizer\n"
723 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
724 "long form example:\n"
725 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
726 "short form example:\n"
727 "vb:a/hb:a/lb de,-vb\n"
733 pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
735 char temp[GET_MODE_BUFFER_SIZE];
737 static const char filterDelimiters[] = ",/";
738 static const char optionDelimiters[] = ":";
739 struct PPMode *ppMode;
742 ppMode= av_malloc(sizeof(PPMode));
745 ppMode->chromMode= 0;
746 ppMode->maxTmpNoise[0]= 700;
747 ppMode->maxTmpNoise[1]= 1500;
748 ppMode->maxTmpNoise[2]= 3000;
749 ppMode->maxAllowedY= 234;
750 ppMode->minAllowedY= 16;
751 ppMode->baseDcDiff= 256/8;
752 ppMode->flatnessThreshold= 56-16-1;
753 ppMode->maxClippedThreshold= 0.01;
756 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
758 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
762 int q= 1000000; //PP_QUALITY_MAX;
766 char *options[OPTIONS_ARRAY_SIZE];
769 int numOfUnknownOptions=0;
770 int enable=1; //does the user want us to enabled or disabled the filter
772 filterToken= strtok(p, filterDelimiters);
773 if(filterToken == NULL) break;
774 p+= strlen(filterToken) + 1; // p points to next filterToken
775 filterName= strtok(filterToken, optionDelimiters);
776 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
778 if(*filterName == '-'){
783 for(;;){ //for all options
784 option= strtok(NULL, optionDelimiters);
785 if(option == NULL) break;
787 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
788 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
789 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
790 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
791 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
793 options[numOfUnknownOptions] = option;
794 numOfUnknownOptions++;
796 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
798 options[numOfUnknownOptions] = NULL;
800 /* replace stuff from the replace Table */
801 for(i=0; replaceTable[2*i]!=NULL; i++){
802 if(!strcmp(replaceTable[2*i], filterName)){
803 int newlen= strlen(replaceTable[2*i + 1]);
807 if(p==NULL) p= temp, *p=0; //last filter
808 else p--, *p=','; //not last filter
811 spaceLeft= p - temp + plen;
812 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE){
816 memmove(p + newlen, p, plen+1);
817 memcpy(p, replaceTable[2*i + 1], newlen);
822 for(i=0; filters[i].shortName!=NULL; i++){
823 if( !strcmp(filters[i].longName, filterName)
824 || !strcmp(filters[i].shortName, filterName)){
825 ppMode->lumMode &= ~filters[i].mask;
826 ppMode->chromMode &= ~filters[i].mask;
829 if(!enable) break; // user wants to disable it
831 if(q >= filters[i].minLumQuality && luma)
832 ppMode->lumMode|= filters[i].mask;
833 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
834 if(q >= filters[i].minChromQuality)
835 ppMode->chromMode|= filters[i].mask;
837 if(filters[i].mask == LEVEL_FIX){
839 ppMode->minAllowedY= 16;
840 ppMode->maxAllowedY= 234;
841 for(o=0; options[o]!=NULL; o++){
842 if( !strcmp(options[o],"fullyrange")
843 ||!strcmp(options[o],"f")){
844 ppMode->minAllowedY= 0;
845 ppMode->maxAllowedY= 255;
846 numOfUnknownOptions--;
850 else if(filters[i].mask == TEMP_NOISE_FILTER)
855 for(o=0; options[o]!=NULL; o++){
857 ppMode->maxTmpNoise[numOfNoises]=
858 strtol(options[o], &tail, 0);
859 if(tail!=options[o]){
861 numOfUnknownOptions--;
862 if(numOfNoises >= 3) break;
866 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
867 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
870 for(o=0; options[o]!=NULL && o<2; o++){
872 int val= strtol(options[o], &tail, 0);
873 if(tail==options[o]) break;
875 numOfUnknownOptions--;
876 if(o==0) ppMode->baseDcDiff= val;
877 else ppMode->flatnessThreshold= val;
880 else if(filters[i].mask == FORCE_QUANT){
882 ppMode->forcedQuant= 15;
884 for(o=0; options[o]!=NULL && o<1; o++){
886 int val= strtol(options[o], &tail, 0);
887 if(tail==options[o]) break;
889 numOfUnknownOptions--;
890 ppMode->forcedQuant= val;
895 if(!filterNameOk) ppMode->error++;
896 ppMode->error += numOfUnknownOptions;
899 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
901 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
908 void pp_free_mode(pp_mode_t *mode){
912 static void reallocAlign(void **p, int alignment, int size){
914 *p= av_mallocz(size);
917 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
918 int mbWidth = (width+15)>>4;
919 int mbHeight= (height+15)>>4;
923 c->qpStride= qpStride;
925 reallocAlign((void **)&c->tempDst, 8, stride*24);
926 reallocAlign((void **)&c->tempSrc, 8, stride*24);
927 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
928 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
930 c->yHistogram[i]= width*height/64*15/256;
933 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
934 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
935 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
938 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
939 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
940 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
941 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
944 static const char * context_to_name(void * ptr) {
948 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
950 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
951 PPContext *c= av_malloc(sizeof(PPContext));
952 int stride= (width+15)&(~15); //assumed / will realloc if needed
953 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
955 memset(c, 0, sizeof(PPContext));
956 c->av_class = &av_codec_context_class;
958 if(cpuCaps&PP_FORMAT){
959 c->hChromaSubSample= cpuCaps&0x3;
960 c->vChromaSubSample= (cpuCaps>>4)&0x3;
962 c->hChromaSubSample= 1;
963 c->vChromaSubSample= 1;
966 reallocBuffers(c, width, height, stride, qpStride);
973 void pp_free_context(void *vc){
974 PPContext *c = (PPContext*)vc;
977 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
978 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
980 av_free(c->tempBlocks);
981 av_free(c->yHistogram);
984 av_free(c->deintTemp);
985 av_free(c->stdQPTable);
986 av_free(c->nonBQPTable);
987 av_free(c->forcedQPTable);
989 memset(c, 0, sizeof(PPContext));
994 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
995 uint8_t * dst[3], const int dstStride[3],
996 int width, int height,
997 const QP_STORE_T *QP_store, int QPStride,
998 pp_mode_t *vm, void *vc, int pict_type)
1000 int mbWidth = (width+15)>>4;
1001 int mbHeight= (height+15)>>4;
1002 PPMode *mode = (PPMode*)vm;
1003 PPContext *c = (PPContext*)vc;
1004 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1005 int absQPStride = FFABS(QPStride);
1007 // c->stride and c->QPStride are always positive
1008 if(c->stride < minStride || c->qpStride < absQPStride)
1009 reallocBuffers(c, width, height,
1010 FFMAX(minStride, c->stride),
1011 FFMAX(c->qpStride, absQPStride));
1013 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1015 QP_store= c->forcedQPTable;
1016 absQPStride = QPStride = 0;
1017 if(mode->lumMode & FORCE_QUANT)
1018 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1020 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1023 if(pict_type & PP_PICT_TYPE_QP2){
1025 const int count= mbHeight * absQPStride;
1026 for(i=0; i<(count>>2); i++){
1027 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1029 for(i<<=2; i<count; i++){
1030 c->stdQPTable[i] = QP_store[i]>>1;
1032 QP_store= c->stdQPTable;
1033 QPStride= absQPStride;
1038 for(y=0; y<mbHeight; y++){
1039 for(x=0; x<mbWidth; x++){
1040 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1042 av_log(c, AV_LOG_INFO, "\n");
1044 av_log(c, AV_LOG_INFO, "\n");
1047 if((pict_type&7)!=3){
1050 const int count= mbHeight * QPStride;
1051 for(i=0; i<(count>>2); i++){
1052 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1054 for(i<<=2; i<count; i++){
1055 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1059 for(i=0; i<mbHeight; i++) {
1060 for(j=0; j<absQPStride; j++) {
1061 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1067 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1068 mode->lumMode, mode->chromMode);
1070 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1071 width, height, QP_store, QPStride, 0, mode, c);
1073 width = (width )>>c->hChromaSubSample;
1074 height = (height)>>c->vChromaSubSample;
1076 if(mode->chromMode){
1077 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1078 width, height, QP_store, QPStride, 1, mode, c);
1079 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1080 width, height, QP_store, QPStride, 2, mode, c);
1082 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1083 linecpy(dst[1], src[1], height, srcStride[1]);
1084 linecpy(dst[2], src[2], height, srcStride[2]);
1087 for(y=0; y<height; y++){
1088 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1089 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);