2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of Libav.
8 * Libav is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 C MMX MMX2 3DNow AltiVec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74 //Changelog: use git log
77 #include "libavutil/avutil.h"
83 //#define HAVE_AMD3DNOW
86 //#define DEBUG_BRIGHTNESS
87 #include "postprocess.h"
88 #include "postprocess_internal.h"
89 #include "libavutil/avstring.h"
91 unsigned postproc_version(void)
93 return LIBPOSTPROC_VERSION_INT;
96 const char *postproc_configuration(void)
98 return LIBAV_CONFIGURATION;
101 const char *postproc_license(void)
103 #define LICENSE_PREFIX "libpostproc license: "
104 return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1;
111 #define GET_MODE_BUFFER_SIZE 500
112 #define OPTIONS_ARRAY_SIZE 10
114 #define TEMP_STRIDE 8
115 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
118 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
119 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
120 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
121 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
122 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
123 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
124 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
125 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
128 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
131 static struct PPFilter filters[]=
133 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
134 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
135 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
136 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
137 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
138 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
139 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
140 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
141 {"dr", "dering", 1, 5, 6, DERING},
142 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
143 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
144 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
145 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
146 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
147 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
148 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
149 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
150 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
151 {NULL, NULL,0,0,0,0} //End Marker
154 static const char *replaceTable[]=
156 "default", "hb:a,vb:a,dr:a",
157 "de", "hb:a,vb:a,dr:a",
158 "fast", "h1:a,v1:a,dr:a",
159 "fa", "h1:a,v1:a,dr:a",
160 "ac", "ha:a:128:7,va:a,dr:a",
166 static inline void prefetchnta(void *p)
168 __asm__ volatile( "prefetchnta (%0)\n\t"
173 static inline void prefetcht0(void *p)
175 __asm__ volatile( "prefetcht0 (%0)\n\t"
180 static inline void prefetcht1(void *p)
182 __asm__ volatile( "prefetcht1 (%0)\n\t"
187 static inline void prefetcht2(void *p)
189 __asm__ volatile( "prefetcht2 (%0)\n\t"
195 /* The horizontal functions exist only in C because the MMX
196 * code is faster with vertical filters and transposing. */
199 * Check if the given 8x8 Block is mostly "flat"
201 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
205 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
206 const int dcThreshold= dcOffset*2 + 1;
208 for(y=0; y<BLOCK_SIZE; y++){
209 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
210 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
211 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
212 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
213 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
214 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
215 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
218 return numEq > c->ppMode.flatnessThreshold;
222 * Check if the middle 8x8 Block in the given 8x16 block is flat
224 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
228 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
229 const int dcThreshold= dcOffset*2 + 1;
231 src+= stride*4; // src points to begin of the 8x8 Block
232 for(y=0; y<BLOCK_SIZE-1; y++){
233 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
234 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
235 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
236 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
237 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
238 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
239 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
240 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
243 return numEq > c->ppMode.flatnessThreshold;
246 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
251 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
253 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
255 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
257 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
262 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
269 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
275 for(x=0; x<BLOCK_SIZE; x+=4){
276 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
277 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
278 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
279 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
284 for(x=0; x<BLOCK_SIZE; x++){
285 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
292 for(x=0; x<BLOCK_SIZE; x++){
297 int v= src[x + y*stride];
301 if(max-min > 2*QP) return 0;
307 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
309 if( isHorizDC_C(src, stride, c) ){
310 if( isHorizMinMaxOk_C(src, stride, c->QP) )
319 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
321 if( isVertDC_C(src, stride, c) ){
322 if( isVertMinMaxOk_C(src, stride, c->QP) )
331 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
334 for(y=0; y<BLOCK_SIZE; y++){
335 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
337 if(FFABS(middleEnergy) < 8*c->QP){
338 const int q=(dst[3] - dst[4])/2;
339 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
340 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
342 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
346 d*= FFSIGN(-middleEnergy);
367 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
368 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
370 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
373 for(y=0; y<BLOCK_SIZE; y++){
374 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
375 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
378 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
379 sums[1] = sums[0] - first + dst[3];
380 sums[2] = sums[1] - first + dst[4];
381 sums[3] = sums[2] - first + dst[5];
382 sums[4] = sums[3] - first + dst[6];
383 sums[5] = sums[4] - dst[0] + dst[7];
384 sums[6] = sums[5] - dst[1] + last;
385 sums[7] = sums[6] - dst[2] + last;
386 sums[8] = sums[7] - dst[3] + last;
387 sums[9] = sums[8] - dst[4] + last;
389 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
390 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
391 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
392 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
393 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
394 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
395 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
396 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
403 * Experimental Filter 1 (Horizontal)
404 * will not damage linear gradients
405 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
406 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
407 * MMX2 version does correct clipping C version does not
408 * not identical with the vertical one
410 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
413 static uint64_t *lut= NULL;
417 lut = av_malloc(256*8);
420 int v= i < 128 ? 2*i : 2*(i-256);
422 //Simulate 112242211 9-Tap filter
423 uint64_t a= (v/16) & 0xFF;
424 uint64_t b= (v/8) & 0xFF;
425 uint64_t c= (v/4) & 0xFF;
426 uint64_t d= (3*v/8) & 0xFF;
428 //Simulate piecewise linear interpolation
429 uint64_t a= (v/16) & 0xFF;
430 uint64_t b= (v*3/16) & 0xFF;
431 uint64_t c= (v*5/16) & 0xFF;
432 uint64_t d= (7*v/16) & 0xFF;
433 uint64_t A= (0x100 - a)&0xFF;
434 uint64_t B= (0x100 - b)&0xFF;
435 uint64_t C= (0x100 - c)&0xFF;
436 uint64_t D= (0x100 - c)&0xFF;
438 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
439 (D<<24) | (C<<16) | (B<<8) | (A);
440 //lut[i] = (v<<32) | (v<<24);
444 for(y=0; y<BLOCK_SIZE; y++){
445 int a= src[1] - src[2];
446 int b= src[3] - src[4];
447 int c= src[5] - src[6];
449 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
452 int v = d * FFSIGN(-b);
466 * accurate deblock filter
468 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
471 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
472 const int dcThreshold= dcOffset*2 + 1;
474 src+= step*4; // src points to begin of the 8x8 Block
478 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
479 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
480 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
481 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
482 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
483 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
484 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
485 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
486 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
487 if(numEq > c->ppMode.flatnessThreshold){
490 if(src[0] > src[step]){
498 if(src[x*step] > src[(x+1)*step]){
499 if(src[x *step] > max) max= src[ x *step];
500 if(src[(x+1)*step] < min) min= src[(x+1)*step];
502 if(src[(x+1)*step] > max) max= src[(x+1)*step];
503 if(src[ x *step] < min) min= src[ x *step];
507 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
508 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
511 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
512 sums[1] = sums[0] - first + src[3*step];
513 sums[2] = sums[1] - first + src[4*step];
514 sums[3] = sums[2] - first + src[5*step];
515 sums[4] = sums[3] - first + src[6*step];
516 sums[5] = sums[4] - src[0*step] + src[7*step];
517 sums[6] = sums[5] - src[1*step] + last;
518 sums[7] = sums[6] - src[2*step] + last;
519 sums[8] = sums[7] - src[3*step] + last;
520 sums[9] = sums[8] - src[4*step] + last;
522 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
523 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
524 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
525 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
526 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
527 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
528 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
529 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
532 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
534 if(FFABS(middleEnergy) < 8*QP){
535 const int q=(src[3*step] - src[4*step])/2;
536 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
537 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
539 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
543 d*= FFSIGN(-middleEnergy);
567 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
569 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
574 #define COMPILE_ALTIVEC
575 #endif //HAVE_ALTIVEC
579 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
583 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
587 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
588 #define COMPILE_3DNOW
590 #endif /* ARCH_X86 */
597 #define HAVE_AMD3DNOW 0
599 #define HAVE_ALTIVEC 0
602 #define RENAME(a) a ## _C
603 #include "postprocess_template.c"
606 #ifdef COMPILE_ALTIVEC
609 #define HAVE_ALTIVEC 1
610 #define RENAME(a) a ## _altivec
611 #include "postprocess_altivec_template.c"
612 #include "postprocess_template.c"
620 #define RENAME(a) a ## _MMX
621 #include "postprocess_template.c"
631 #define RENAME(a) a ## _MMX2
632 #include "postprocess_template.c"
643 #define HAVE_AMD3DNOW 1
644 #define RENAME(a) a ## _3DNow
645 #include "postprocess_template.c"
648 // minor note: the HAVE_xyz is messed up after that line so do not use it.
650 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
651 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
653 PPContext *c= (PPContext *)vc;
654 PPMode *ppMode= (PPMode *)vm;
655 c->ppMode= *ppMode; //FIXME
657 // Using ifs here as they are faster than function pointers although the
658 // difference would not be measurable here but it is much better because
659 // someone might exchange the CPU whithout restarting MPlayer ;)
660 #if CONFIG_RUNTIME_CPUDETECT
662 // ordered per speed fastest first
663 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
664 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
665 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
666 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
667 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
668 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
670 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
673 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
674 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
677 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
679 #else //CONFIG_RUNTIME_CPUDETECT
681 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
683 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
687 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
689 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
691 #endif //!CONFIG_RUNTIME_CPUDETECT
694 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
695 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
697 /* -pp Command line Help
699 const char pp_help[] =
700 "Available postprocessing filters:\n"
702 "short long name short long option Description\n"
703 "* * a autoq CPU power dependent enabler\n"
704 " c chrom chrominance filtering enabled\n"
705 " y nochrom chrominance filtering disabled\n"
706 " n noluma luma filtering disabled\n"
707 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
708 " 1. difference factor: default=32, higher -> more deblocking\n"
709 " 2. flatness threshold: default=39, lower -> more deblocking\n"
710 " the h & v deblocking filters share these\n"
711 " so you can't set different thresholds for h / v\n"
712 "vb vdeblock (2 threshold) vertical deblocking filter\n"
713 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
714 "va vadeblock (2 threshold) vertical deblocking filter\n"
715 "h1 x1hdeblock experimental h deblock filter 1\n"
716 "v1 x1vdeblock experimental v deblock filter 1\n"
717 "dr dering deringing filter\n"
718 "al autolevels automatic brightness / contrast\n"
719 " f fullyrange stretch luminance to (0..255)\n"
720 "lb linblenddeint linear blend deinterlacer\n"
721 "li linipoldeint linear interpolating deinterlace\n"
722 "ci cubicipoldeint cubic interpolating deinterlacer\n"
723 "md mediandeint median deinterlacer\n"
724 "fd ffmpegdeint ffmpeg deinterlacer\n"
725 "l5 lowpass5 FIR lowpass deinterlacer\n"
726 "de default hb:a,vb:a,dr:a\n"
727 "fa fast h1:a,v1:a,dr:a\n"
728 "ac ha:a:128:7,va:a,dr:a\n"
729 "tn tmpnoise (3 threshold) temporal noise reducer\n"
730 " 1. <= 2. <= 3. larger -> stronger filtering\n"
731 "fq forceQuant <quantizer> force quantizer\n"
733 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
734 "long form example:\n"
735 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
736 "short form example:\n"
737 "vb:a/hb:a/lb de,-vb\n"
743 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
745 char temp[GET_MODE_BUFFER_SIZE];
747 static const char filterDelimiters[] = ",/";
748 static const char optionDelimiters[] = ":";
749 struct PPMode *ppMode;
752 ppMode= av_malloc(sizeof(PPMode));
755 ppMode->chromMode= 0;
756 ppMode->maxTmpNoise[0]= 700;
757 ppMode->maxTmpNoise[1]= 1500;
758 ppMode->maxTmpNoise[2]= 3000;
759 ppMode->maxAllowedY= 234;
760 ppMode->minAllowedY= 16;
761 ppMode->baseDcDiff= 256/8;
762 ppMode->flatnessThreshold= 56-16-1;
763 ppMode->maxClippedThreshold= 0.01;
766 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE);
768 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
772 int q= 1000000; //PP_QUALITY_MAX;
776 char *options[OPTIONS_ARRAY_SIZE];
779 int numOfUnknownOptions=0;
780 int enable=1; //does the user want us to enabled or disabled the filter
782 filterToken= strtok(p, filterDelimiters);
783 if(filterToken == NULL) break;
784 p+= strlen(filterToken) + 1; // p points to next filterToken
785 filterName= strtok(filterToken, optionDelimiters);
786 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
788 if(*filterName == '-'){
793 for(;;){ //for all options
794 option= strtok(NULL, optionDelimiters);
795 if(option == NULL) break;
797 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
798 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
799 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
800 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
801 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
803 options[numOfUnknownOptions] = option;
804 numOfUnknownOptions++;
806 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
808 options[numOfUnknownOptions] = NULL;
810 /* replace stuff from the replace Table */
811 for(i=0; replaceTable[2*i]!=NULL; i++){
812 if(!strcmp(replaceTable[2*i], filterName)){
813 int newlen= strlen(replaceTable[2*i + 1]);
817 if(p==NULL) p= temp, *p=0; //last filter
818 else p--, *p=','; //not last filter
821 spaceLeft= p - temp + plen;
822 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE){
826 memmove(p + newlen, p, plen+1);
827 memcpy(p, replaceTable[2*i + 1], newlen);
832 for(i=0; filters[i].shortName!=NULL; i++){
833 if( !strcmp(filters[i].longName, filterName)
834 || !strcmp(filters[i].shortName, filterName)){
835 ppMode->lumMode &= ~filters[i].mask;
836 ppMode->chromMode &= ~filters[i].mask;
839 if(!enable) break; // user wants to disable it
841 if(q >= filters[i].minLumQuality && luma)
842 ppMode->lumMode|= filters[i].mask;
843 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
844 if(q >= filters[i].minChromQuality)
845 ppMode->chromMode|= filters[i].mask;
847 if(filters[i].mask == LEVEL_FIX){
849 ppMode->minAllowedY= 16;
850 ppMode->maxAllowedY= 234;
851 for(o=0; options[o]!=NULL; o++){
852 if( !strcmp(options[o],"fullyrange")
853 ||!strcmp(options[o],"f")){
854 ppMode->minAllowedY= 0;
855 ppMode->maxAllowedY= 255;
856 numOfUnknownOptions--;
860 else if(filters[i].mask == TEMP_NOISE_FILTER)
865 for(o=0; options[o]!=NULL; o++){
867 ppMode->maxTmpNoise[numOfNoises]=
868 strtol(options[o], &tail, 0);
869 if(tail!=options[o]){
871 numOfUnknownOptions--;
872 if(numOfNoises >= 3) break;
876 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
877 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
880 for(o=0; options[o]!=NULL && o<2; o++){
882 int val= strtol(options[o], &tail, 0);
883 if(tail==options[o]) break;
885 numOfUnknownOptions--;
886 if(o==0) ppMode->baseDcDiff= val;
887 else ppMode->flatnessThreshold= val;
890 else if(filters[i].mask == FORCE_QUANT){
892 ppMode->forcedQuant= 15;
894 for(o=0; options[o]!=NULL && o<1; o++){
896 int val= strtol(options[o], &tail, 0);
897 if(tail==options[o]) break;
899 numOfUnknownOptions--;
900 ppMode->forcedQuant= val;
905 if(!filterNameOk) ppMode->error++;
906 ppMode->error += numOfUnknownOptions;
909 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
911 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
918 void pp_free_mode(pp_mode *mode){
922 static void reallocAlign(void **p, int alignment, int size){
924 *p= av_mallocz(size);
927 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
928 int mbWidth = (width+15)>>4;
929 int mbHeight= (height+15)>>4;
933 c->qpStride= qpStride;
935 reallocAlign((void **)&c->tempDst, 8, stride*24);
936 reallocAlign((void **)&c->tempSrc, 8, stride*24);
937 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
938 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
940 c->yHistogram[i]= width*height/64*15/256;
943 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
944 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
945 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
948 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
949 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
950 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
951 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
954 static const char * context_to_name(void * ptr) {
958 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
960 pp_context *pp_get_context(int width, int height, int cpuCaps){
961 PPContext *c= av_malloc(sizeof(PPContext));
962 int stride= FFALIGN(width, 16); //assumed / will realloc if needed
963 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
965 memset(c, 0, sizeof(PPContext));
966 c->av_class = &av_codec_context_class;
968 if(cpuCaps&PP_FORMAT){
969 c->hChromaSubSample= cpuCaps&0x3;
970 c->vChromaSubSample= (cpuCaps>>4)&0x3;
972 c->hChromaSubSample= 1;
973 c->vChromaSubSample= 1;
976 reallocBuffers(c, width, height, stride, qpStride);
983 void pp_free_context(void *vc){
984 PPContext *c = (PPContext*)vc;
987 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
988 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
990 av_free(c->tempBlocks);
991 av_free(c->yHistogram);
994 av_free(c->deintTemp);
995 av_free(c->stdQPTable);
996 av_free(c->nonBQPTable);
997 av_free(c->forcedQPTable);
999 memset(c, 0, sizeof(PPContext));
1004 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
1005 uint8_t * dst[3], const int dstStride[3],
1006 int width, int height,
1007 const QP_STORE_T *QP_store, int QPStride,
1008 pp_mode *vm, void *vc, int pict_type)
1010 int mbWidth = (width+15)>>4;
1011 int mbHeight= (height+15)>>4;
1012 PPMode *mode = (PPMode*)vm;
1013 PPContext *c = (PPContext*)vc;
1014 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1015 int absQPStride = FFABS(QPStride);
1017 // c->stride and c->QPStride are always positive
1018 if(c->stride < minStride || c->qpStride < absQPStride)
1019 reallocBuffers(c, width, height,
1020 FFMAX(minStride, c->stride),
1021 FFMAX(c->qpStride, absQPStride));
1023 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1025 QP_store= c->forcedQPTable;
1026 absQPStride = QPStride = 0;
1027 if(mode->lumMode & FORCE_QUANT)
1028 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1030 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1033 if(pict_type & PP_PICT_TYPE_QP2){
1035 const int count= mbHeight * absQPStride;
1036 for(i=0; i<(count>>2); i++){
1037 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1039 for(i<<=2; i<count; i++){
1040 c->stdQPTable[i] = QP_store[i]>>1;
1042 QP_store= c->stdQPTable;
1043 QPStride= absQPStride;
1048 for(y=0; y<mbHeight; y++){
1049 for(x=0; x<mbWidth; x++){
1050 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1052 av_log(c, AV_LOG_INFO, "\n");
1054 av_log(c, AV_LOG_INFO, "\n");
1057 if((pict_type&7)!=3){
1060 const int count= mbHeight * QPStride;
1061 for(i=0; i<(count>>2); i++){
1062 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1064 for(i<<=2; i<count; i++){
1065 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1069 for(i=0; i<mbHeight; i++) {
1070 for(j=0; j<absQPStride; j++) {
1071 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1077 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1078 mode->lumMode, mode->chromMode);
1080 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1081 width, height, QP_store, QPStride, 0, mode, c);
1083 width = (width )>>c->hChromaSubSample;
1084 height = (height)>>c->vChromaSubSample;
1086 if(mode->chromMode){
1087 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1088 width, height, QP_store, QPStride, 1, mode, c);
1089 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1090 width, height, QP_store, QPStride, 2, mode, c);
1092 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1093 linecpy(dst[1], src[1], height, srcStride[1]);
1094 linecpy(dst[2], src[2], height, srcStride[2]);
1097 for(y=0; y<height; y++){
1098 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1099 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);