libpostproc/postprocess.c

   1 /*
   2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
   3  *
   4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
   5  *
   6  * This file is part of Libav.
   7  *
   8  * Libav is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * Libav is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with Libav; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * postprocessing.
  26  */
  27
  28 /*
  29                         C       MMX     MMX2    3DNow   AltiVec
  30 isVertDC                Ec      Ec                      Ec
  31 isVertMinMaxOk          Ec      Ec                      Ec
  32 doVertLowPass           E               e       e       Ec
  33 doVertDefFilter         Ec      Ec      e       e       Ec
  34 isHorizDC               Ec      Ec                      Ec
  35 isHorizMinMaxOk         a       E                       Ec
  36 doHorizLowPass          E               e       e       Ec
  37 doHorizDefFilter        Ec      Ec      e       e       Ec
  38 do_a_deblock            Ec      E       Ec      E
  39 deRing                  E               e       e*      Ecp
  40 Vertical RKAlgo1        E               a       a
  41 Horizontal RKAlgo1                      a       a
  42 Vertical X1#            a               E       E
  43 Horizontal X1#          a               E       E
  44 LinIpolDeinterlace      e               E       E*
  45 CubicIpolDeinterlace    a               e       e*
  46 LinBlendDeinterlace     e               E       E*
  47 MedianDeinterlace#      E       Ec      Ec
  48 TempDeNoiser#           E               e       e       Ec
  49
  50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
  51 # more or less selfinvented filters so the exactness is not too meaningful
  52 E = Exact implementation
  53 e = almost exact implementation (slightly different rounding,...)
  54 a = alternative / approximate impl
  55 c = checked against the other implementations (-vo md5)
  56 p = partially optimized, still some work to do
  57 */
  58
  59 /*
  60 TODO:
  61 reduce the time wasted on the mem transfer
  62 unroll stuff if instructions depend too much on the prior one
  63 move YScale thing to the end instead of fixing QP
  64 write a faster and higher quality deblocking filter :)
  65 make the mainloop more flexible (variable number of blocks at once
  66         (the if/else stuff per block is slowing things down)
  67 compare the quality & speed of all filters
  68 split this huge file
  69 optimize c versions
  70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
  71 ...
  72 */
  73
  74 //Changelog: use git log
  75
  76 #include "config.h"
  77 #include "libavutil/avutil.h"
  78 #include <inttypes.h>
  79 #include <stdio.h>
  80 #include <stdlib.h>
  81 #include <string.h>
  82 //#undef HAVE_MMX2
  83 //#define HAVE_AMD3DNOW
  84 //#undef HAVE_MMX
  85 //#undef ARCH_X86
  86 //#define DEBUG_BRIGHTNESS
  87 #include "postprocess.h"
  88 #include "postprocess_internal.h"
  89 #include "libavutil/avstring.h"
  90
  91 unsigned postproc_version(void)
  92 {
  93     return LIBPOSTPROC_VERSION_INT;
  94 }
  95
  96 const char *postproc_configuration(void)
  97 {
  98     return LIBAV_CONFIGURATION;
  99 }
 100
 101 const char *postproc_license(void)
 102 {
 103 #define LICENSE_PREFIX "libpostproc license: "
 104     return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1;
 105 }
 106
 107 #if HAVE_ALTIVEC_H
 108 #include <altivec.h>
 109 #endif
 110
 111 #define GET_MODE_BUFFER_SIZE 500
 112 #define OPTIONS_ARRAY_SIZE 10
 113 #define BLOCK_SIZE 8
 114 #define TEMP_STRIDE 8
 115 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
 116
 117 #if ARCH_X86
 118 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
 119 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
 120 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
 121 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
 122 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
 123 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
 124 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
 125 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
 126 #endif
 127
 128 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
 129
 130
 131 static struct PPFilter filters[]=
 132 {
 133     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
 134     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
 135 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
 136     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
 137     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
 138     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
 139     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
 140     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
 141     {"dr", "dering",                1, 5, 6, DERING},
 142     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
 143     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
 144     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
 145     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
 146     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
 147     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
 148     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
 149     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
 150     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
 151     {NULL, NULL,0,0,0,0} //End Marker
 152 };
 153
 154 static const char *replaceTable[]=
 155 {
 156     "default",      "hb:a,vb:a,dr:a",
 157     "de",           "hb:a,vb:a,dr:a",
 158     "fast",         "h1:a,v1:a,dr:a",
 159     "fa",           "h1:a,v1:a,dr:a",
 160     "ac",           "ha:a:128:7,va:a,dr:a",
 161     NULL //End Marker
 162 };
 163
 164
 165 #if ARCH_X86
 166 static inline void prefetchnta(void *p)
 167 {
 168     __asm__ volatile(   "prefetchnta (%0)\n\t"
 169         : : "r" (p)
 170     );
 171 }
 172
 173 static inline void prefetcht0(void *p)
 174 {
 175     __asm__ volatile(   "prefetcht0 (%0)\n\t"
 176         : : "r" (p)
 177     );
 178 }
 179
 180 static inline void prefetcht1(void *p)
 181 {
 182     __asm__ volatile(   "prefetcht1 (%0)\n\t"
 183         : : "r" (p)
 184     );
 185 }
 186
 187 static inline void prefetcht2(void *p)
 188 {
 189     __asm__ volatile(   "prefetcht2 (%0)\n\t"
 190         : : "r" (p)
 191     );
 192 }
 193 #endif
 194
 195 /* The horizontal functions exist only in C because the MMX
 196  * code is faster with vertical filters and transposing. */
 197
 198 /**
 199  * Check if the given 8x8 Block is mostly "flat"
 200  */
 201 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
 202 {
 203     int numEq= 0;
 204     int y;
 205     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 206     const int dcThreshold= dcOffset*2 + 1;
 207
 208     for(y=0; y<BLOCK_SIZE; y++){
 209         if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
 210         if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
 211         if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
 212         if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
 213         if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
 214         if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
 215         if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
 216         src+= stride;
 217     }
 218     return numEq > c->ppMode.flatnessThreshold;
 219 }
 220
 221 /**
 222  * Check if the middle 8x8 Block in the given 8x16 block is flat
 223  */
 224 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
 225 {
 226     int numEq= 0;
 227     int y;
 228     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 229     const int dcThreshold= dcOffset*2 + 1;
 230
 231     src+= stride*4; // src points to begin of the 8x8 Block
 232     for(y=0; y<BLOCK_SIZE-1; y++){
 233         if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
 234         if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
 235         if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
 236         if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
 237         if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
 238         if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
 239         if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
 240         if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
 241         src+= stride;
 242     }
 243     return numEq > c->ppMode.flatnessThreshold;
 244 }
 245
 246 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
 247 {
 248     int i;
 249 #if 1
 250     for(i=0; i<2; i++){
 251         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
 252         src += stride;
 253         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
 254         src += stride;
 255         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
 256         src += stride;
 257         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
 258         src += stride;
 259     }
 260 #else
 261     for(i=0; i<8; i++){
 262         if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
 263         src += stride;
 264     }
 265 #endif
 266     return 1;
 267 }
 268
 269 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
 270 {
 271 #if 1
 272 #if 1
 273     int x;
 274     src+= stride*4;
 275     for(x=0; x<BLOCK_SIZE; x+=4){
 276         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
 277         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
 278         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
 279         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
 280     }
 281 #else
 282     int x;
 283     src+= stride*3;
 284     for(x=0; x<BLOCK_SIZE; x++){
 285         if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
 286     }
 287 #endif
 288     return 1;
 289 #else
 290     int x;
 291     src+= stride*4;
 292     for(x=0; x<BLOCK_SIZE; x++){
 293         int min=255;
 294         int max=0;
 295         int y;
 296         for(y=0; y<8; y++){
 297             int v= src[x + y*stride];
 298             if(v>max) max=v;
 299             if(v<min) min=v;
 300         }
 301         if(max-min > 2*QP) return 0;
 302     }
 303     return 1;
 304 #endif
 305 }
 306
 307 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
 308 {
 309     if( isHorizDC_C(src, stride, c) ){
 310         if( isHorizMinMaxOk_C(src, stride, c->QP) )
 311             return 1;
 312         else
 313             return 0;
 314     }else{
 315         return 2;
 316     }
 317 }
 318
 319 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
 320 {
 321     if( isVertDC_C(src, stride, c) ){
 322         if( isVertMinMaxOk_C(src, stride, c->QP) )
 323             return 1;
 324         else
 325             return 0;
 326     }else{
 327         return 2;
 328     }
 329 }
 330
 331 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
 332 {
 333     int y;
 334     for(y=0; y<BLOCK_SIZE; y++){
 335         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
 336
 337         if(FFABS(middleEnergy) < 8*c->QP){
 338             const int q=(dst[3] - dst[4])/2;
 339             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
 340             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
 341
 342             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 343             d= FFMAX(d, 0);
 344
 345             d= (5*d + 32) >> 6;
 346             d*= FFSIGN(-middleEnergy);
 347
 348             if(q>0)
 349             {
 350                 d= d<0 ? 0 : d;
 351                 d= d>q ? q : d;
 352             }
 353             else
 354             {
 355                 d= d>0 ? 0 : d;
 356                 d= d<q ? q : d;
 357             }
 358
 359             dst[3]-= d;
 360             dst[4]+= d;
 361         }
 362         dst+= stride;
 363     }
 364 }
 365
 366 /**
 367  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
 368  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
 369  */
 370 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
 371 {
 372     int y;
 373     for(y=0; y<BLOCK_SIZE; y++){
 374         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
 375         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
 376
 377         int sums[10];
 378         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
 379         sums[1] = sums[0] - first  + dst[3];
 380         sums[2] = sums[1] - first  + dst[4];
 381         sums[3] = sums[2] - first  + dst[5];
 382         sums[4] = sums[3] - first  + dst[6];
 383         sums[5] = sums[4] - dst[0] + dst[7];
 384         sums[6] = sums[5] - dst[1] + last;
 385         sums[7] = sums[6] - dst[2] + last;
 386         sums[8] = sums[7] - dst[3] + last;
 387         sums[9] = sums[8] - dst[4] + last;
 388
 389         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
 390         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
 391         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
 392         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
 393         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
 394         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
 395         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
 396         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
 397
 398         dst+= stride;
 399     }
 400 }
 401
 402 /**
 403  * Experimental Filter 1 (Horizontal)
 404  * will not damage linear gradients
 405  * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
 406  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
 407  * MMX2 version does correct clipping C version does not
 408  * not identical with the vertical one
 409  */
 410 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
 411 {
 412     int y;
 413     static uint64_t *lut= NULL;
 414     if(lut==NULL)
 415     {
 416         int i;
 417         lut = av_malloc(256*8);
 418         for(i=0; i<256; i++)
 419         {
 420             int v= i < 128 ? 2*i : 2*(i-256);
 421 /*
 422 //Simulate 112242211 9-Tap filter
 423             uint64_t a= (v/16)  & 0xFF;
 424             uint64_t b= (v/8)   & 0xFF;
 425             uint64_t c= (v/4)   & 0xFF;
 426             uint64_t d= (3*v/8) & 0xFF;
 427 */
 428 //Simulate piecewise linear interpolation
 429             uint64_t a= (v/16)   & 0xFF;
 430             uint64_t b= (v*3/16) & 0xFF;
 431             uint64_t c= (v*5/16) & 0xFF;
 432             uint64_t d= (7*v/16) & 0xFF;
 433             uint64_t A= (0x100 - a)&0xFF;
 434             uint64_t B= (0x100 - b)&0xFF;
 435             uint64_t C= (0x100 - c)&0xFF;
 436             uint64_t D= (0x100 - c)&0xFF;
 437
 438             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
 439                        (D<<24) | (C<<16) | (B<<8)  | (A);
 440             //lut[i] = (v<<32) | (v<<24);
 441         }
 442     }
 443
 444     for(y=0; y<BLOCK_SIZE; y++){
 445         int a= src[1] - src[2];
 446         int b= src[3] - src[4];
 447         int c= src[5] - src[6];
 448
 449         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
 450
 451         if(d < QP){
 452             int v = d * FFSIGN(-b);
 453
 454             src[1] +=v/8;
 455             src[2] +=v/4;
 456             src[3] +=3*v/8;
 457             src[4] -=3*v/8;
 458             src[5] -=v/4;
 459             src[6] -=v/8;
 460         }
 461         src+=stride;
 462     }
 463 }
 464
 465 /**
 466  * accurate deblock filter
 467  */
 468 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
 469     int y;
 470     const int QP= c->QP;
 471     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 472     const int dcThreshold= dcOffset*2 + 1;
 473 //START_TIMER
 474     src+= step*4; // src points to begin of the 8x8 Block
 475     for(y=0; y<8; y++){
 476         int numEq= 0;
 477
 478         if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
 479         if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
 480         if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
 481         if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
 482         if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
 483         if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
 484         if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
 485         if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
 486         if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
 487         if(numEq > c->ppMode.flatnessThreshold){
 488             int min, max, x;
 489
 490             if(src[0] > src[step]){
 491                 max= src[0];
 492                 min= src[step];
 493             }else{
 494                 max= src[step];
 495                 min= src[0];
 496             }
 497             for(x=2; x<8; x+=2){
 498                 if(src[x*step] > src[(x+1)*step]){
 499                         if(src[x    *step] > max) max= src[ x   *step];
 500                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
 501                 }else{
 502                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
 503                         if(src[ x   *step] < min) min= src[ x   *step];
 504                 }
 505             }
 506             if(max-min < 2*QP){
 507                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
 508                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
 509
 510                 int sums[10];
 511                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
 512                 sums[1] = sums[0] - first       + src[3*step];
 513                 sums[2] = sums[1] - first       + src[4*step];
 514                 sums[3] = sums[2] - first       + src[5*step];
 515                 sums[4] = sums[3] - first       + src[6*step];
 516                 sums[5] = sums[4] - src[0*step] + src[7*step];
 517                 sums[6] = sums[5] - src[1*step] + last;
 518                 sums[7] = sums[6] - src[2*step] + last;
 519                 sums[8] = sums[7] - src[3*step] + last;
 520                 sums[9] = sums[8] - src[4*step] + last;
 521
 522                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
 523                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
 524                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
 525                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
 526                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
 527                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
 528                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
 529                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
 530             }
 531         }else{
 532             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
 533
 534             if(FFABS(middleEnergy) < 8*QP){
 535                 const int q=(src[3*step] - src[4*step])/2;
 536                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
 537                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
 538
 539                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 540                 d= FFMAX(d, 0);
 541
 542                 d= (5*d + 32) >> 6;
 543                 d*= FFSIGN(-middleEnergy);
 544
 545                 if(q>0){
 546                     d= d<0 ? 0 : d;
 547                     d= d>q ? q : d;
 548                 }else{
 549                     d= d>0 ? 0 : d;
 550                     d= d<q ? q : d;
 551                 }
 552
 553                 src[3*step]-= d;
 554                 src[4*step]+= d;
 555             }
 556         }
 557
 558         src += stride;
 559     }
 560 /*if(step==16){
 561     STOP_TIMER("step16")
 562 }else{
 563     STOP_TIMER("stepX")
 564 }*/
 565 }
 566
 567 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
 568 //Plain C versions
 569 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
 570 #define COMPILE_C
 571 #endif
 572
 573 #if HAVE_ALTIVEC
 574 #define COMPILE_ALTIVEC
 575 #endif //HAVE_ALTIVEC
 576
 577 #if ARCH_X86
 578
 579 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
 580 #define COMPILE_MMX
 581 #endif
 582
 583 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
 584 #define COMPILE_MMX2
 585 #endif
 586
 587 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
 588 #define COMPILE_3DNOW
 589 #endif
 590 #endif /* ARCH_X86 */
 591
 592 #undef HAVE_MMX
 593 #define HAVE_MMX 0
 594 #undef HAVE_MMX2
 595 #define HAVE_MMX2 0
 596 #undef HAVE_AMD3DNOW
 597 #define HAVE_AMD3DNOW 0
 598 #undef HAVE_ALTIVEC
 599 #define HAVE_ALTIVEC 0
 600
 601 #ifdef COMPILE_C
 602 #define RENAME(a) a ## _C
 603 #include "postprocess_template.c"
 604 #endif
 605
 606 #ifdef COMPILE_ALTIVEC
 607 #undef RENAME
 608 #undef HAVE_ALTIVEC
 609 #define HAVE_ALTIVEC 1
 610 #define RENAME(a) a ## _altivec
 611 #include "postprocess_altivec_template.c"
 612 #include "postprocess_template.c"
 613 #endif
 614
 615 //MMX versions
 616 #ifdef COMPILE_MMX
 617 #undef RENAME
 618 #undef HAVE_MMX
 619 #define HAVE_MMX 1
 620 #define RENAME(a) a ## _MMX
 621 #include "postprocess_template.c"
 622 #endif
 623
 624 //MMX2 versions
 625 #ifdef COMPILE_MMX2
 626 #undef RENAME
 627 #undef HAVE_MMX
 628 #undef HAVE_MMX2
 629 #define HAVE_MMX 1
 630 #define HAVE_MMX2 1
 631 #define RENAME(a) a ## _MMX2
 632 #include "postprocess_template.c"
 633 #endif
 634
 635 //3DNOW versions
 636 #ifdef COMPILE_3DNOW
 637 #undef RENAME
 638 #undef HAVE_MMX
 639 #undef HAVE_MMX2
 640 #undef HAVE_AMD3DNOW
 641 #define HAVE_MMX 1
 642 #define HAVE_MMX2 0
 643 #define HAVE_AMD3DNOW 1
 644 #define RENAME(a) a ## _3DNow
 645 #include "postprocess_template.c"
 646 #endif
 647
 648 // minor note: the HAVE_xyz is messed up after that line so do not use it.
 649
 650 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 651         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
 652 {
 653     PPContext *c= (PPContext *)vc;
 654     PPMode *ppMode= (PPMode *)vm;
 655     c->ppMode= *ppMode; //FIXME
 656
 657     // Using ifs here as they are faster than function pointers although the
 658     // difference would not be measurable here but it is much better because
 659     // someone might exchange the CPU whithout restarting MPlayer ;)
 660 #if CONFIG_RUNTIME_CPUDETECT
 661 #if ARCH_X86
 662     // ordered per speed fastest first
 663     if(c->cpuCaps & PP_CPU_CAPS_MMX2)
 664         postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 665     else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
 666         postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 667     else if(c->cpuCaps & PP_CPU_CAPS_MMX)
 668         postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 669     else
 670         postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 671 #else
 672 #if HAVE_ALTIVEC
 673     if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
 674             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 675     else
 676 #endif
 677             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 678 #endif
 679 #else //CONFIG_RUNTIME_CPUDETECT
 680 #if   HAVE_MMX2
 681             postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 682 #elif HAVE_AMD3DNOW
 683             postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 684 #elif HAVE_MMX
 685             postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 686 #elif HAVE_ALTIVEC
 687             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 688 #else
 689             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 690 #endif
 691 #endif //!CONFIG_RUNTIME_CPUDETECT
 692 }
 693
 694 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 695 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
 696
 697 /* -pp Command line Help
 698 */
 699 const char pp_help[] =
 700 "Available postprocessing filters:\n"
 701 "Filters                        Options\n"
 702 "short  long name       short   long option     Description\n"
 703 "*      *               a       autoq           CPU power dependent enabler\n"
 704 "                       c       chrom           chrominance filtering enabled\n"
 705 "                       y       nochrom         chrominance filtering disabled\n"
 706 "                       n       noluma          luma filtering disabled\n"
 707 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
 708 "       1. difference factor: default=32, higher -> more deblocking\n"
 709 "       2. flatness threshold: default=39, lower -> more deblocking\n"
 710 "                       the h & v deblocking filters share these\n"
 711 "                       so you can't set different thresholds for h / v\n"
 712 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
 713 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
 714 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
 715 "h1     x1hdeblock                              experimental h deblock filter 1\n"
 716 "v1     x1vdeblock                              experimental v deblock filter 1\n"
 717 "dr     dering                                  deringing filter\n"
 718 "al     autolevels                              automatic brightness / contrast\n"
 719 "                       f        fullyrange     stretch luminance to (0..255)\n"
 720 "lb     linblenddeint                           linear blend deinterlacer\n"
 721 "li     linipoldeint                            linear interpolating deinterlace\n"
 722 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
 723 "md     mediandeint                             median deinterlacer\n"
 724 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
 725 "l5     lowpass5                                FIR lowpass deinterlacer\n"
 726 "de     default                                 hb:a,vb:a,dr:a\n"
 727 "fa     fast                                    h1:a,v1:a,dr:a\n"
 728 "ac                                             ha:a:128:7,va:a,dr:a\n"
 729 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
 730 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
 731 "fq     forceQuant      <quantizer>             force quantizer\n"
 732 "Usage:\n"
 733 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
 734 "long form example:\n"
 735 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
 736 "short form example:\n"
 737 "vb:a/hb:a/lb                                   de,-vb\n"
 738 "more examples:\n"
 739 "tn:64:128:256\n"
 740 "\n"
 741 ;
 742
 743 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
 744 {
 745     char temp[GET_MODE_BUFFER_SIZE];
 746     char *p= temp;
 747     static const char filterDelimiters[] = ",/";
 748     static const char optionDelimiters[] = ":";
 749     struct PPMode *ppMode;
 750     char *filterToken;
 751
 752     ppMode= av_malloc(sizeof(PPMode));
 753
 754     ppMode->lumMode= 0;
 755     ppMode->chromMode= 0;
 756     ppMode->maxTmpNoise[0]= 700;
 757     ppMode->maxTmpNoise[1]= 1500;
 758     ppMode->maxTmpNoise[2]= 3000;
 759     ppMode->maxAllowedY= 234;
 760     ppMode->minAllowedY= 16;
 761     ppMode->baseDcDiff= 256/8;
 762     ppMode->flatnessThreshold= 56-16-1;
 763     ppMode->maxClippedThreshold= 0.01;
 764     ppMode->error=0;
 765
 766     av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE);
 767
 768     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
 769
 770     for(;;){
 771         char *filterName;
 772         int q= 1000000; //PP_QUALITY_MAX;
 773         int chrom=-1;
 774         int luma=-1;
 775         char *option;
 776         char *options[OPTIONS_ARRAY_SIZE];
 777         int i;
 778         int filterNameOk=0;
 779         int numOfUnknownOptions=0;
 780         int enable=1; //does the user want us to enabled or disabled the filter
 781
 782         filterToken= strtok(p, filterDelimiters);
 783         if(filterToken == NULL) break;
 784         p+= strlen(filterToken) + 1; // p points to next filterToken
 785         filterName= strtok(filterToken, optionDelimiters);
 786         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
 787
 788         if(*filterName == '-'){
 789             enable=0;
 790             filterName++;
 791         }
 792
 793         for(;;){ //for all options
 794             option= strtok(NULL, optionDelimiters);
 795             if(option == NULL) break;
 796
 797             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
 798             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
 799             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
 800             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
 801             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
 802             else{
 803                 options[numOfUnknownOptions] = option;
 804                 numOfUnknownOptions++;
 805             }
 806             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
 807         }
 808         options[numOfUnknownOptions] = NULL;
 809
 810         /* replace stuff from the replace Table */
 811         for(i=0; replaceTable[2*i]!=NULL; i++){
 812             if(!strcmp(replaceTable[2*i], filterName)){
 813                 int newlen= strlen(replaceTable[2*i + 1]);
 814                 int plen;
 815                 int spaceLeft;
 816
 817                 if(p==NULL) p= temp, *p=0;      //last filter
 818                 else p--, *p=',';               //not last filter
 819
 820                 plen= strlen(p);
 821                 spaceLeft= p - temp + plen;
 822                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE){
 823                     ppMode->error++;
 824                     break;
 825                 }
 826                 memmove(p + newlen, p, plen+1);
 827                 memcpy(p, replaceTable[2*i + 1], newlen);
 828                 filterNameOk=1;
 829             }
 830         }
 831
 832         for(i=0; filters[i].shortName!=NULL; i++){
 833             if(   !strcmp(filters[i].longName, filterName)
 834                || !strcmp(filters[i].shortName, filterName)){
 835                 ppMode->lumMode &= ~filters[i].mask;
 836                 ppMode->chromMode &= ~filters[i].mask;
 837
 838                 filterNameOk=1;
 839                 if(!enable) break; // user wants to disable it
 840
 841                 if(q >= filters[i].minLumQuality && luma)
 842                     ppMode->lumMode|= filters[i].mask;
 843                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
 844                     if(q >= filters[i].minChromQuality)
 845                             ppMode->chromMode|= filters[i].mask;
 846
 847                 if(filters[i].mask == LEVEL_FIX){
 848                     int o;
 849                     ppMode->minAllowedY= 16;
 850                     ppMode->maxAllowedY= 234;
 851                     for(o=0; options[o]!=NULL; o++){
 852                         if(  !strcmp(options[o],"fullyrange")
 853                            ||!strcmp(options[o],"f")){
 854                             ppMode->minAllowedY= 0;
 855                             ppMode->maxAllowedY= 255;
 856                             numOfUnknownOptions--;
 857                         }
 858                     }
 859                 }
 860                 else if(filters[i].mask == TEMP_NOISE_FILTER)
 861                 {
 862                     int o;
 863                     int numOfNoises=0;
 864
 865                     for(o=0; options[o]!=NULL; o++){
 866                         char *tail;
 867                         ppMode->maxTmpNoise[numOfNoises]=
 868                             strtol(options[o], &tail, 0);
 869                         if(tail!=options[o]){
 870                             numOfNoises++;
 871                             numOfUnknownOptions--;
 872                             if(numOfNoises >= 3) break;
 873                         }
 874                     }
 875                 }
 876                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
 877                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
 878                     int o;
 879
 880                     for(o=0; options[o]!=NULL && o<2; o++){
 881                         char *tail;
 882                         int val= strtol(options[o], &tail, 0);
 883                         if(tail==options[o]) break;
 884
 885                         numOfUnknownOptions--;
 886                         if(o==0) ppMode->baseDcDiff= val;
 887                         else ppMode->flatnessThreshold= val;
 888                     }
 889                 }
 890                 else if(filters[i].mask == FORCE_QUANT){
 891                     int o;
 892                     ppMode->forcedQuant= 15;
 893
 894                     for(o=0; options[o]!=NULL && o<1; o++){
 895                         char *tail;
 896                         int val= strtol(options[o], &tail, 0);
 897                         if(tail==options[o]) break;
 898
 899                         numOfUnknownOptions--;
 900                         ppMode->forcedQuant= val;
 901                     }
 902                 }
 903             }
 904         }
 905         if(!filterNameOk) ppMode->error++;
 906         ppMode->error += numOfUnknownOptions;
 907     }
 908
 909     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
 910     if(ppMode->error){
 911         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
 912         av_free(ppMode);
 913         return NULL;
 914     }
 915     return ppMode;
 916 }
 917
 918 void pp_free_mode(pp_mode *mode){
 919     av_free(mode);
 920 }
 921
 922 static void reallocAlign(void **p, int alignment, int size){
 923     av_free(*p);
 924     *p= av_mallocz(size);
 925 }
 926
 927 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
 928     int mbWidth = (width+15)>>4;
 929     int mbHeight= (height+15)>>4;
 930     int i;
 931
 932     c->stride= stride;
 933     c->qpStride= qpStride;
 934
 935     reallocAlign((void **)&c->tempDst, 8, stride*24);
 936     reallocAlign((void **)&c->tempSrc, 8, stride*24);
 937     reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
 938     reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
 939     for(i=0; i<256; i++)
 940             c->yHistogram[i]= width*height/64*15/256;
 941
 942     for(i=0; i<3; i++){
 943         //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
 944         reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
 945         reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
 946     }
 947
 948     reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
 949     reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
 950     reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
 951     reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
 952 }
 953
 954 static const char * context_to_name(void * ptr) {
 955     return "postproc";
 956 }
 957
 958 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
 959
 960 pp_context *pp_get_context(int width, int height, int cpuCaps){
 961     PPContext *c= av_malloc(sizeof(PPContext));
 962     int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
 963     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
 964
 965     memset(c, 0, sizeof(PPContext));
 966     c->av_class = &av_codec_context_class;
 967     c->cpuCaps= cpuCaps;
 968     if(cpuCaps&PP_FORMAT){
 969         c->hChromaSubSample= cpuCaps&0x3;
 970         c->vChromaSubSample= (cpuCaps>>4)&0x3;
 971     }else{
 972         c->hChromaSubSample= 1;
 973         c->vChromaSubSample= 1;
 974     }
 975
 976     reallocBuffers(c, width, height, stride, qpStride);
 977
 978     c->frameNum=-1;
 979
 980     return c;
 981 }
 982
 983 void pp_free_context(void *vc){
 984     PPContext *c = (PPContext*)vc;
 985     int i;
 986
 987     for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
 988     for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
 989
 990     av_free(c->tempBlocks);
 991     av_free(c->yHistogram);
 992     av_free(c->tempDst);
 993     av_free(c->tempSrc);
 994     av_free(c->deintTemp);
 995     av_free(c->stdQPTable);
 996     av_free(c->nonBQPTable);
 997     av_free(c->forcedQPTable);
 998
 999     memset(c, 0, sizeof(PPContext));
1000
1001     av_free(c);
1002 }
1003
1004 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
1005                      uint8_t * dst[3], const int dstStride[3],
1006                      int width, int height,
1007                      const QP_STORE_T *QP_store,  int QPStride,
1008                      pp_mode *vm,  void *vc, int pict_type)
1009 {
1010     int mbWidth = (width+15)>>4;
1011     int mbHeight= (height+15)>>4;
1012     PPMode *mode = (PPMode*)vm;
1013     PPContext *c = (PPContext*)vc;
1014     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1015     int absQPStride = FFABS(QPStride);
1016
1017     // c->stride and c->QPStride are always positive
1018     if(c->stride < minStride || c->qpStride < absQPStride)
1019         reallocBuffers(c, width, height,
1020                        FFMAX(minStride, c->stride),
1021                        FFMAX(c->qpStride, absQPStride));
1022
1023     if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1024         int i;
1025         QP_store= c->forcedQPTable;
1026         absQPStride = QPStride = 0;
1027         if(mode->lumMode & FORCE_QUANT)
1028             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1029         else
1030             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1031     }
1032
1033     if(pict_type & PP_PICT_TYPE_QP2){
1034         int i;
1035         const int count= mbHeight * absQPStride;
1036         for(i=0; i<(count>>2); i++){
1037             ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1038         }
1039         for(i<<=2; i<count; i++){
1040             c->stdQPTable[i] = QP_store[i]>>1;
1041         }
1042         QP_store= c->stdQPTable;
1043         QPStride= absQPStride;
1044     }
1045
1046     if(0){
1047         int x,y;
1048         for(y=0; y<mbHeight; y++){
1049             for(x=0; x<mbWidth; x++){
1050                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1051             }
1052             av_log(c, AV_LOG_INFO, "\n");
1053         }
1054         av_log(c, AV_LOG_INFO, "\n");
1055     }
1056
1057     if((pict_type&7)!=3){
1058         if (QPStride >= 0){
1059             int i;
1060             const int count= mbHeight * QPStride;
1061             for(i=0; i<(count>>2); i++){
1062                 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1063             }
1064             for(i<<=2; i<count; i++){
1065                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1066             }
1067         } else {
1068             int i,j;
1069             for(i=0; i<mbHeight; i++) {
1070                 for(j=0; j<absQPStride; j++) {
1071                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1072                 }
1073             }
1074         }
1075     }
1076
1077     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1078            mode->lumMode, mode->chromMode);
1079
1080     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1081                 width, height, QP_store, QPStride, 0, mode, c);
1082
1083     width  = (width )>>c->hChromaSubSample;
1084     height = (height)>>c->vChromaSubSample;
1085
1086     if(mode->chromMode){
1087         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1088                     width, height, QP_store, QPStride, 1, mode, c);
1089         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1090                     width, height, QP_store, QPStride, 2, mode, c);
1091     }
1092     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1093         linecpy(dst[1], src[1], height, srcStride[1]);
1094         linecpy(dst[2], src[2], height, srcStride[2]);
1095     }else{
1096         int y;
1097         for(y=0; y<height; y++){
1098             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1099             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1100         }
1101     }
1102 }