libpostproc/postprocess.c

   1 /*
   2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
   3  *
   4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file postprocess.c
  25  * postprocessing.
  26  */
  27
  28 /*
  29                         C       MMX     MMX2    3DNow   AltiVec
  30 isVertDC                Ec      Ec                      Ec
  31 isVertMinMaxOk          Ec      Ec                      Ec
  32 doVertLowPass           E               e       e       Ec
  33 doVertDefFilter         Ec      Ec      e       e       Ec
  34 isHorizDC               Ec      Ec                      Ec
  35 isHorizMinMaxOk         a       E                       Ec
  36 doHorizLowPass          E               e       e       Ec
  37 doHorizDefFilter        Ec      Ec      e       e       Ec
  38 do_a_deblock            Ec      E       Ec      E
  39 deRing                  E               e       e*      Ecp
  40 Vertical RKAlgo1        E               a       a
  41 Horizontal RKAlgo1                      a       a
  42 Vertical X1#            a               E       E
  43 Horizontal X1#          a               E       E
  44 LinIpolDeinterlace      e               E       E*
  45 CubicIpolDeinterlace    a               e       e*
  46 LinBlendDeinterlace     e               E       E*
  47 MedianDeinterlace#      E       Ec      Ec
  48 TempDeNoiser#           E               e       e       Ec
  49
  50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
  51 # more or less selfinvented filters so the exactness is not too meaningful
  52 E = Exact implementation
  53 e = almost exact implementation (slightly different rounding,...)
  54 a = alternative / approximate impl
  55 c = checked against the other implementations (-vo md5)
  56 p = partially optimized, still some work to do
  57 */
  58
  59 /*
  60 TODO:
  61 reduce the time wasted on the mem transfer
  62 unroll stuff if instructions depend too much on the prior one
  63 move YScale thing to the end instead of fixing QP
  64 write a faster and higher quality deblocking filter :)
  65 make the mainloop more flexible (variable number of blocks at once
  66         (the if/else stuff per block is slowing things down)
  67 compare the quality & speed of all filters
  68 split this huge file
  69 optimize c versions
  70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
  71 ...
  72 */
  73
  74 //Changelog: use the Subversion log
  75
  76 #include "config.h"
  77 #include "libavutil/avutil.h"
  78 #include <inttypes.h>
  79 #include <stdio.h>
  80 #include <stdlib.h>
  81 #include <string.h>
  82 #ifdef HAVE_MALLOC_H
  83 #include <malloc.h>
  84 #endif
  85 //#undef HAVE_MMX2
  86 //#define HAVE_3DNOW
  87 //#undef HAVE_MMX
  88 //#undef ARCH_X86
  89 //#define DEBUG_BRIGHTNESS
  90 #include "postprocess.h"
  91 #include "postprocess_internal.h"
  92
  93 unsigned postproc_version(void)
  94 {
  95     return LIBPOSTPROC_VERSION_INT;
  96 }
  97
  98 #ifdef HAVE_ALTIVEC_H
  99 #include <altivec.h>
 100 #endif
 101
 102 #define GET_MODE_BUFFER_SIZE 500
 103 #define OPTIONS_ARRAY_SIZE 10
 104 #define BLOCK_SIZE 8
 105 #define TEMP_STRIDE 8
 106 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
 107
 108 #if defined(ARCH_X86)
 109 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
 110 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
 111 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
 112 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
 113 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
 114 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
 115 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
 116 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
 117 #endif
 118
 119 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
 120
 121
 122 static struct PPFilter filters[]=
 123 {
 124     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
 125     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
 126 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
 127     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
 128     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
 129     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
 130     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
 131     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
 132     {"dr", "dering",                1, 5, 6, DERING},
 133     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
 134     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
 135     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
 136     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
 137     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
 138     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
 139     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
 140     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
 141     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
 142     {NULL, NULL,0,0,0,0} //End Marker
 143 };
 144
 145 static const char *replaceTable[]=
 146 {
 147     "default",      "hb:a,vb:a,dr:a",
 148     "de",           "hb:a,vb:a,dr:a",
 149     "fast",         "h1:a,v1:a,dr:a",
 150     "fa",           "h1:a,v1:a,dr:a",
 151     "ac",           "ha:a:128:7,va:a,dr:a",
 152     NULL //End Marker
 153 };
 154
 155
 156 #if defined(ARCH_X86)
 157 static inline void prefetchnta(void *p)
 158 {
 159     asm volatile(   "prefetchnta (%0)\n\t"
 160         : : "r" (p)
 161     );
 162 }
 163
 164 static inline void prefetcht0(void *p)
 165 {
 166     asm volatile(   "prefetcht0 (%0)\n\t"
 167         : : "r" (p)
 168     );
 169 }
 170
 171 static inline void prefetcht1(void *p)
 172 {
 173     asm volatile(   "prefetcht1 (%0)\n\t"
 174         : : "r" (p)
 175     );
 176 }
 177
 178 static inline void prefetcht2(void *p)
 179 {
 180     asm volatile(   "prefetcht2 (%0)\n\t"
 181         : : "r" (p)
 182     );
 183 }
 184 #endif
 185
 186 /* The horizontal functions exist only in C because the MMX
 187  * code is faster with vertical filters and transposing. */
 188
 189 /**
 190  * Check if the given 8x8 Block is mostly "flat"
 191  */
 192 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
 193 {
 194     int numEq= 0;
 195     int y;
 196     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 197     const int dcThreshold= dcOffset*2 + 1;
 198
 199     for(y=0; y<BLOCK_SIZE; y++){
 200         if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
 201         if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
 202         if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
 203         if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
 204         if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
 205         if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
 206         if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
 207         src+= stride;
 208     }
 209     return numEq > c->ppMode.flatnessThreshold;
 210 }
 211
 212 /**
 213  * Check if the middle 8x8 Block in the given 8x16 block is flat
 214  */
 215 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
 216 {
 217     int numEq= 0;
 218     int y;
 219     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 220     const int dcThreshold= dcOffset*2 + 1;
 221
 222     src+= stride*4; // src points to begin of the 8x8 Block
 223     for(y=0; y<BLOCK_SIZE-1; y++){
 224         if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
 225         if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
 226         if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
 227         if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
 228         if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
 229         if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
 230         if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
 231         if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
 232         src+= stride;
 233     }
 234     return numEq > c->ppMode.flatnessThreshold;
 235 }
 236
 237 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
 238 {
 239     int i;
 240 #if 1
 241     for(i=0; i<2; i++){
 242         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
 243         src += stride;
 244         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
 245         src += stride;
 246         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
 247         src += stride;
 248         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
 249         src += stride;
 250     }
 251 #else
 252     for(i=0; i<8; i++){
 253         if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
 254         src += stride;
 255     }
 256 #endif
 257     return 1;
 258 }
 259
 260 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
 261 {
 262 #if 1
 263 #if 1
 264     int x;
 265     src+= stride*4;
 266     for(x=0; x<BLOCK_SIZE; x+=4){
 267         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
 268         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
 269         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
 270         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
 271     }
 272 #else
 273     int x;
 274     src+= stride*3;
 275     for(x=0; x<BLOCK_SIZE; x++){
 276         if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
 277     }
 278 #endif
 279     return 1;
 280 #else
 281     int x;
 282     src+= stride*4;
 283     for(x=0; x<BLOCK_SIZE; x++){
 284         int min=255;
 285         int max=0;
 286         int y;
 287         for(y=0; y<8; y++){
 288             int v= src[x + y*stride];
 289             if(v>max) max=v;
 290             if(v<min) min=v;
 291         }
 292         if(max-min > 2*QP) return 0;
 293     }
 294     return 1;
 295 #endif
 296 }
 297
 298 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
 299 {
 300     if( isHorizDC_C(src, stride, c) ){
 301         if( isHorizMinMaxOk_C(src, stride, c->QP) )
 302             return 1;
 303         else
 304             return 0;
 305     }else{
 306         return 2;
 307     }
 308 }
 309
 310 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
 311 {
 312     if( isVertDC_C(src, stride, c) ){
 313         if( isVertMinMaxOk_C(src, stride, c->QP) )
 314             return 1;
 315         else
 316             return 0;
 317     }else{
 318         return 2;
 319     }
 320 }
 321
 322 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
 323 {
 324     int y;
 325     for(y=0; y<BLOCK_SIZE; y++){
 326         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
 327
 328         if(FFABS(middleEnergy) < 8*c->QP){
 329             const int q=(dst[3] - dst[4])/2;
 330             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
 331             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
 332
 333             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 334             d= FFMAX(d, 0);
 335
 336             d= (5*d + 32) >> 6;
 337             d*= FFSIGN(-middleEnergy);
 338
 339             if(q>0)
 340             {
 341                 d= d<0 ? 0 : d;
 342                 d= d>q ? q : d;
 343             }
 344             else
 345             {
 346                 d= d>0 ? 0 : d;
 347                 d= d<q ? q : d;
 348             }
 349
 350             dst[3]-= d;
 351             dst[4]+= d;
 352         }
 353         dst+= stride;
 354     }
 355 }
 356
 357 /**
 358  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
 359  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
 360  */
 361 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
 362 {
 363     int y;
 364     for(y=0; y<BLOCK_SIZE; y++){
 365         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
 366         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
 367
 368         int sums[10];
 369         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
 370         sums[1] = sums[0] - first  + dst[3];
 371         sums[2] = sums[1] - first  + dst[4];
 372         sums[3] = sums[2] - first  + dst[5];
 373         sums[4] = sums[3] - first  + dst[6];
 374         sums[5] = sums[4] - dst[0] + dst[7];
 375         sums[6] = sums[5] - dst[1] + last;
 376         sums[7] = sums[6] - dst[2] + last;
 377         sums[8] = sums[7] - dst[3] + last;
 378         sums[9] = sums[8] - dst[4] + last;
 379
 380         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
 381         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
 382         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
 383         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
 384         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
 385         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
 386         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
 387         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
 388
 389         dst+= stride;
 390     }
 391 }
 392
 393 /**
 394  * Experimental Filter 1 (Horizontal)
 395  * will not damage linear gradients
 396  * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
 397  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
 398  * MMX2 version does correct clipping C version does not
 399  * not identical with the vertical one
 400  */
 401 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
 402 {
 403     int y;
 404     static uint64_t *lut= NULL;
 405     if(lut==NULL)
 406     {
 407         int i;
 408         lut = av_malloc(256*8);
 409         for(i=0; i<256; i++)
 410         {
 411             int v= i < 128 ? 2*i : 2*(i-256);
 412 /*
 413 //Simulate 112242211 9-Tap filter
 414             uint64_t a= (v/16)  & 0xFF;
 415             uint64_t b= (v/8)   & 0xFF;
 416             uint64_t c= (v/4)   & 0xFF;
 417             uint64_t d= (3*v/8) & 0xFF;
 418 */
 419 //Simulate piecewise linear interpolation
 420             uint64_t a= (v/16)   & 0xFF;
 421             uint64_t b= (v*3/16) & 0xFF;
 422             uint64_t c= (v*5/16) & 0xFF;
 423             uint64_t d= (7*v/16) & 0xFF;
 424             uint64_t A= (0x100 - a)&0xFF;
 425             uint64_t B= (0x100 - b)&0xFF;
 426             uint64_t C= (0x100 - c)&0xFF;
 427             uint64_t D= (0x100 - c)&0xFF;
 428
 429             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
 430                        (D<<24) | (C<<16) | (B<<8)  | (A);
 431             //lut[i] = (v<<32) | (v<<24);
 432         }
 433     }
 434
 435     for(y=0; y<BLOCK_SIZE; y++){
 436         int a= src[1] - src[2];
 437         int b= src[3] - src[4];
 438         int c= src[5] - src[6];
 439
 440         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
 441
 442         if(d < QP){
 443             int v = d * FFSIGN(-b);
 444
 445             src[1] +=v/8;
 446             src[2] +=v/4;
 447             src[3] +=3*v/8;
 448             src[4] -=3*v/8;
 449             src[5] -=v/4;
 450             src[6] -=v/8;
 451         }
 452         src+=stride;
 453     }
 454 }
 455
 456 /**
 457  * accurate deblock filter
 458  */
 459 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
 460     int y;
 461     const int QP= c->QP;
 462     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 463     const int dcThreshold= dcOffset*2 + 1;
 464 //START_TIMER
 465     src+= step*4; // src points to begin of the 8x8 Block
 466     for(y=0; y<8; y++){
 467         int numEq= 0;
 468
 469         if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
 470         if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
 471         if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
 472         if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
 473         if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
 474         if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
 475         if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
 476         if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
 477         if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
 478         if(numEq > c->ppMode.flatnessThreshold){
 479             int min, max, x;
 480
 481             if(src[0] > src[step]){
 482                 max= src[0];
 483                 min= src[step];
 484             }else{
 485                 max= src[step];
 486                 min= src[0];
 487             }
 488             for(x=2; x<8; x+=2){
 489                 if(src[x*step] > src[(x+1)*step]){
 490                         if(src[x    *step] > max) max= src[ x   *step];
 491                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
 492                 }else{
 493                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
 494                         if(src[ x   *step] < min) min= src[ x   *step];
 495                 }
 496             }
 497             if(max-min < 2*QP){
 498                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
 499                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
 500
 501                 int sums[10];
 502                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
 503                 sums[1] = sums[0] - first       + src[3*step];
 504                 sums[2] = sums[1] - first       + src[4*step];
 505                 sums[3] = sums[2] - first       + src[5*step];
 506                 sums[4] = sums[3] - first       + src[6*step];
 507                 sums[5] = sums[4] - src[0*step] + src[7*step];
 508                 sums[6] = sums[5] - src[1*step] + last;
 509                 sums[7] = sums[6] - src[2*step] + last;
 510                 sums[8] = sums[7] - src[3*step] + last;
 511                 sums[9] = sums[8] - src[4*step] + last;
 512
 513                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
 514                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
 515                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
 516                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
 517                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
 518                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
 519                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
 520                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
 521             }
 522         }else{
 523             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
 524
 525             if(FFABS(middleEnergy) < 8*QP){
 526                 const int q=(src[3*step] - src[4*step])/2;
 527                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
 528                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
 529
 530                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 531                 d= FFMAX(d, 0);
 532
 533                 d= (5*d + 32) >> 6;
 534                 d*= FFSIGN(-middleEnergy);
 535
 536                 if(q>0){
 537                     d= d<0 ? 0 : d;
 538                     d= d>q ? q : d;
 539                 }else{
 540                     d= d>0 ? 0 : d;
 541                     d= d<q ? q : d;
 542                 }
 543
 544                 src[3*step]-= d;
 545                 src[4*step]+= d;
 546             }
 547         }
 548
 549         src += stride;
 550     }
 551 /*if(step==16){
 552     STOP_TIMER("step16")
 553 }else{
 554     STOP_TIMER("stepX")
 555 }*/
 556 }
 557
 558 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
 559 //Plain C versions
 560 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
 561 #define COMPILE_C
 562 #endif
 563
 564 #ifdef HAVE_ALTIVEC
 565 #define COMPILE_ALTIVEC
 566 #endif //HAVE_ALTIVEC
 567
 568 #if defined(ARCH_X86)
 569
 570 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
 571 #define COMPILE_MMX
 572 #endif
 573
 574 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
 575 #define COMPILE_MMX2
 576 #endif
 577
 578 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
 579 #define COMPILE_3DNOW
 580 #endif
 581 #endif /* defined(ARCH_X86) */
 582
 583 #undef HAVE_MMX
 584 #undef HAVE_MMX2
 585 #undef HAVE_3DNOW
 586 #undef HAVE_ALTIVEC
 587
 588 #ifdef COMPILE_C
 589 #undef HAVE_MMX
 590 #undef HAVE_MMX2
 591 #undef HAVE_3DNOW
 592 #define RENAME(a) a ## _C
 593 #include "postprocess_template.c"
 594 #endif
 595
 596 #ifdef COMPILE_ALTIVEC
 597 #undef RENAME
 598 #define HAVE_ALTIVEC
 599 #define RENAME(a) a ## _altivec
 600 #include "postprocess_altivec_template.c"
 601 #include "postprocess_template.c"
 602 #endif
 603
 604 //MMX versions
 605 #ifdef COMPILE_MMX
 606 #undef RENAME
 607 #define HAVE_MMX
 608 #undef HAVE_MMX2
 609 #undef HAVE_3DNOW
 610 #define RENAME(a) a ## _MMX
 611 #include "postprocess_template.c"
 612 #endif
 613
 614 //MMX2 versions
 615 #ifdef COMPILE_MMX2
 616 #undef RENAME
 617 #define HAVE_MMX
 618 #define HAVE_MMX2
 619 #undef HAVE_3DNOW
 620 #define RENAME(a) a ## _MMX2
 621 #include "postprocess_template.c"
 622 #endif
 623
 624 //3DNOW versions
 625 #ifdef COMPILE_3DNOW
 626 #undef RENAME
 627 #define HAVE_MMX
 628 #undef HAVE_MMX2
 629 #define HAVE_3DNOW
 630 #define RENAME(a) a ## _3DNow
 631 #include "postprocess_template.c"
 632 #endif
 633
 634 // minor note: the HAVE_xyz is messed up after that line so do not use it.
 635
 636 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 637         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
 638 {
 639     PPContext *c= (PPContext *)vc;
 640     PPMode *ppMode= (PPMode *)vm;
 641     c->ppMode= *ppMode; //FIXME
 642
 643     // Using ifs here as they are faster than function pointers although the
 644     // difference would not be measurable here but it is much better because
 645     // someone might exchange the CPU whithout restarting MPlayer ;)
 646 #ifdef RUNTIME_CPUDETECT
 647 #if defined(ARCH_X86)
 648     // ordered per speed fastest first
 649     if(c->cpuCaps & PP_CPU_CAPS_MMX2)
 650         postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 651     else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
 652         postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 653     else if(c->cpuCaps & PP_CPU_CAPS_MMX)
 654         postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 655     else
 656         postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 657 #else
 658 #ifdef HAVE_ALTIVEC
 659     if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
 660             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 661     else
 662 #endif
 663             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 664 #endif
 665 #else //RUNTIME_CPUDETECT
 666 #ifdef HAVE_MMX2
 667             postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 668 #elif defined (HAVE_3DNOW)
 669             postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 670 #elif defined (HAVE_MMX)
 671             postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 672 #elif defined (HAVE_ALTIVEC)
 673             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 674 #else
 675             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 676 #endif
 677 #endif //!RUNTIME_CPUDETECT
 678 }
 679
 680 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 681 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
 682
 683 /* -pp Command line Help
 684 */
 685 #if LIBPOSTPROC_VERSION_INT < (52<<16)
 686 const char *const pp_help=
 687 #else
 688 const char pp_help[] =
 689 #endif
 690 "Available postprocessing filters:\n"
 691 "Filters                        Options\n"
 692 "short  long name       short   long option     Description\n"
 693 "*      *               a       autoq           CPU power dependent enabler\n"
 694 "                       c       chrom           chrominance filtering enabled\n"
 695 "                       y       nochrom         chrominance filtering disabled\n"
 696 "                       n       noluma          luma filtering disabled\n"
 697 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
 698 "       1. difference factor: default=32, higher -> more deblocking\n"
 699 "       2. flatness threshold: default=39, lower -> more deblocking\n"
 700 "                       the h & v deblocking filters share these\n"
 701 "                       so you can't set different thresholds for h / v\n"
 702 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
 703 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
 704 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
 705 "h1     x1hdeblock                              experimental h deblock filter 1\n"
 706 "v1     x1vdeblock                              experimental v deblock filter 1\n"
 707 "dr     dering                                  deringing filter\n"
 708 "al     autolevels                              automatic brightness / contrast\n"
 709 "                       f        fullyrange     stretch luminance to (0..255)\n"
 710 "lb     linblenddeint                           linear blend deinterlacer\n"
 711 "li     linipoldeint                            linear interpolating deinterlace\n"
 712 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
 713 "md     mediandeint                             median deinterlacer\n"
 714 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
 715 "l5     lowpass5                                FIR lowpass deinterlacer\n"
 716 "de     default                                 hb:a,vb:a,dr:a\n"
 717 "fa     fast                                    h1:a,v1:a,dr:a\n"
 718 "ac                                             ha:a:128:7,va:a,dr:a\n"
 719 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
 720 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
 721 "fq     forceQuant      <quantizer>             force quantizer\n"
 722 "Usage:\n"
 723 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
 724 "long form example:\n"
 725 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
 726 "short form example:\n"
 727 "vb:a/hb:a/lb                                   de,-vb\n"
 728 "more examples:\n"
 729 "tn:64:128:256\n"
 730 "\n"
 731 ;
 732
 733 pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
 734 {
 735     char temp[GET_MODE_BUFFER_SIZE];
 736     char *p= temp;
 737     static const char filterDelimiters[] = ",/";
 738     static const char optionDelimiters[] = ":";
 739     struct PPMode *ppMode;
 740     char *filterToken;
 741
 742     ppMode= av_malloc(sizeof(PPMode));
 743
 744     ppMode->lumMode= 0;
 745     ppMode->chromMode= 0;
 746     ppMode->maxTmpNoise[0]= 700;
 747     ppMode->maxTmpNoise[1]= 1500;
 748     ppMode->maxTmpNoise[2]= 3000;
 749     ppMode->maxAllowedY= 234;
 750     ppMode->minAllowedY= 16;
 751     ppMode->baseDcDiff= 256/8;
 752     ppMode->flatnessThreshold= 56-16-1;
 753     ppMode->maxClippedThreshold= 0.01;
 754     ppMode->error=0;
 755
 756     strncpy(temp, name, GET_MODE_BUFFER_SIZE);
 757
 758     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
 759
 760     for(;;){
 761         char *filterName;
 762         int q= 1000000; //PP_QUALITY_MAX;
 763         int chrom=-1;
 764         int luma=-1;
 765         char *option;
 766         char *options[OPTIONS_ARRAY_SIZE];
 767         int i;
 768         int filterNameOk=0;
 769         int numOfUnknownOptions=0;
 770         int enable=1; //does the user want us to enabled or disabled the filter
 771
 772         filterToken= strtok(p, filterDelimiters);
 773         if(filterToken == NULL) break;
 774         p+= strlen(filterToken) + 1; // p points to next filterToken
 775         filterName= strtok(filterToken, optionDelimiters);
 776         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
 777
 778         if(*filterName == '-'){
 779             enable=0;
 780             filterName++;
 781         }
 782
 783         for(;;){ //for all options
 784             option= strtok(NULL, optionDelimiters);
 785             if(option == NULL) break;
 786
 787             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
 788             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
 789             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
 790             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
 791             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
 792             else{
 793                 options[numOfUnknownOptions] = option;
 794                 numOfUnknownOptions++;
 795             }
 796             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
 797         }
 798         options[numOfUnknownOptions] = NULL;
 799
 800         /* replace stuff from the replace Table */
 801         for(i=0; replaceTable[2*i]!=NULL; i++){
 802             if(!strcmp(replaceTable[2*i], filterName)){
 803                 int newlen= strlen(replaceTable[2*i + 1]);
 804                 int plen;
 805                 int spaceLeft;
 806
 807                 if(p==NULL) p= temp, *p=0;      //last filter
 808                 else p--, *p=',';               //not last filter
 809
 810                 plen= strlen(p);
 811                 spaceLeft= p - temp + plen;
 812                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE){
 813                     ppMode->error++;
 814                     break;
 815                 }
 816                 memmove(p + newlen, p, plen+1);
 817                 memcpy(p, replaceTable[2*i + 1], newlen);
 818                 filterNameOk=1;
 819             }
 820         }
 821
 822         for(i=0; filters[i].shortName!=NULL; i++){
 823             if(   !strcmp(filters[i].longName, filterName)
 824                || !strcmp(filters[i].shortName, filterName)){
 825                 ppMode->lumMode &= ~filters[i].mask;
 826                 ppMode->chromMode &= ~filters[i].mask;
 827
 828                 filterNameOk=1;
 829                 if(!enable) break; // user wants to disable it
 830
 831                 if(q >= filters[i].minLumQuality && luma)
 832                     ppMode->lumMode|= filters[i].mask;
 833                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
 834                     if(q >= filters[i].minChromQuality)
 835                             ppMode->chromMode|= filters[i].mask;
 836
 837                 if(filters[i].mask == LEVEL_FIX){
 838                     int o;
 839                     ppMode->minAllowedY= 16;
 840                     ppMode->maxAllowedY= 234;
 841                     for(o=0; options[o]!=NULL; o++){
 842                         if(  !strcmp(options[o],"fullyrange")
 843                            ||!strcmp(options[o],"f")){
 844                             ppMode->minAllowedY= 0;
 845                             ppMode->maxAllowedY= 255;
 846                             numOfUnknownOptions--;
 847                         }
 848                     }
 849                 }
 850                 else if(filters[i].mask == TEMP_NOISE_FILTER)
 851                 {
 852                     int o;
 853                     int numOfNoises=0;
 854
 855                     for(o=0; options[o]!=NULL; o++){
 856                         char *tail;
 857                         ppMode->maxTmpNoise[numOfNoises]=
 858                             strtol(options[o], &tail, 0);
 859                         if(tail!=options[o]){
 860                             numOfNoises++;
 861                             numOfUnknownOptions--;
 862                             if(numOfNoises >= 3) break;
 863                         }
 864                     }
 865                 }
 866                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
 867                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
 868                     int o;
 869
 870                     for(o=0; options[o]!=NULL && o<2; o++){
 871                         char *tail;
 872                         int val= strtol(options[o], &tail, 0);
 873                         if(tail==options[o]) break;
 874
 875                         numOfUnknownOptions--;
 876                         if(o==0) ppMode->baseDcDiff= val;
 877                         else ppMode->flatnessThreshold= val;
 878                     }
 879                 }
 880                 else if(filters[i].mask == FORCE_QUANT){
 881                     int o;
 882                     ppMode->forcedQuant= 15;
 883
 884                     for(o=0; options[o]!=NULL && o<1; o++){
 885                         char *tail;
 886                         int val= strtol(options[o], &tail, 0);
 887                         if(tail==options[o]) break;
 888
 889                         numOfUnknownOptions--;
 890                         ppMode->forcedQuant= val;
 891                     }
 892                 }
 893             }
 894         }
 895         if(!filterNameOk) ppMode->error++;
 896         ppMode->error += numOfUnknownOptions;
 897     }
 898
 899     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
 900     if(ppMode->error){
 901         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
 902         av_free(ppMode);
 903         return NULL;
 904     }
 905     return ppMode;
 906 }
 907
 908 void pp_free_mode(pp_mode_t *mode){
 909     av_free(mode);
 910 }
 911
 912 static void reallocAlign(void **p, int alignment, int size){
 913     av_free(*p);
 914     *p= av_mallocz(size);
 915 }
 916
 917 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
 918     int mbWidth = (width+15)>>4;
 919     int mbHeight= (height+15)>>4;
 920     int i;
 921
 922     c->stride= stride;
 923     c->qpStride= qpStride;
 924
 925     reallocAlign((void **)&c->tempDst, 8, stride*24);
 926     reallocAlign((void **)&c->tempSrc, 8, stride*24);
 927     reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
 928     reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
 929     for(i=0; i<256; i++)
 930             c->yHistogram[i]= width*height/64*15/256;
 931
 932     for(i=0; i<3; i++){
 933         //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
 934         reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
 935         reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
 936     }
 937
 938     reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
 939     reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
 940     reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
 941     reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
 942 }
 943
 944 static const char * context_to_name(void * ptr) {
 945     return "postproc";
 946 }
 947
 948 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
 949
 950 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
 951     PPContext *c= av_malloc(sizeof(PPContext));
 952     int stride= (width+15)&(~15);    //assumed / will realloc if needed
 953     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
 954
 955     memset(c, 0, sizeof(PPContext));
 956     c->av_class = &av_codec_context_class;
 957     c->cpuCaps= cpuCaps;
 958     if(cpuCaps&PP_FORMAT){
 959         c->hChromaSubSample= cpuCaps&0x3;
 960         c->vChromaSubSample= (cpuCaps>>4)&0x3;
 961     }else{
 962         c->hChromaSubSample= 1;
 963         c->vChromaSubSample= 1;
 964     }
 965
 966     reallocBuffers(c, width, height, stride, qpStride);
 967
 968     c->frameNum=-1;
 969
 970     return c;
 971 }
 972
 973 void pp_free_context(void *vc){
 974     PPContext *c = (PPContext*)vc;
 975     int i;
 976
 977     for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
 978     for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
 979
 980     av_free(c->tempBlocks);
 981     av_free(c->yHistogram);
 982     av_free(c->tempDst);
 983     av_free(c->tempSrc);
 984     av_free(c->deintTemp);
 985     av_free(c->stdQPTable);
 986     av_free(c->nonBQPTable);
 987     av_free(c->forcedQPTable);
 988
 989     memset(c, 0, sizeof(PPContext));
 990
 991     av_free(c);
 992 }
 993
 994 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
 995                      uint8_t * dst[3], const int dstStride[3],
 996                      int width, int height,
 997                      const QP_STORE_T *QP_store,  int QPStride,
 998                      pp_mode_t *vm,  void *vc, int pict_type)
 999 {
1000     int mbWidth = (width+15)>>4;
1001     int mbHeight= (height+15)>>4;
1002     PPMode *mode = (PPMode*)vm;
1003     PPContext *c = (PPContext*)vc;
1004     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1005     int absQPStride = FFABS(QPStride);
1006
1007     // c->stride and c->QPStride are always positive
1008     if(c->stride < minStride || c->qpStride < absQPStride)
1009         reallocBuffers(c, width, height,
1010                        FFMAX(minStride, c->stride),
1011                        FFMAX(c->qpStride, absQPStride));
1012
1013     if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1014         int i;
1015         QP_store= c->forcedQPTable;
1016         absQPStride = QPStride = 0;
1017         if(mode->lumMode & FORCE_QUANT)
1018             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1019         else
1020             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1021     }
1022
1023     if(pict_type & PP_PICT_TYPE_QP2){
1024         int i;
1025         const int count= mbHeight * absQPStride;
1026         for(i=0; i<(count>>2); i++){
1027             ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1028         }
1029         for(i<<=2; i<count; i++){
1030             c->stdQPTable[i] = QP_store[i]>>1;
1031         }
1032         QP_store= c->stdQPTable;
1033         QPStride= absQPStride;
1034     }
1035
1036     if(0){
1037         int x,y;
1038         for(y=0; y<mbHeight; y++){
1039             for(x=0; x<mbWidth; x++){
1040                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1041             }
1042             av_log(c, AV_LOG_INFO, "\n");
1043         }
1044         av_log(c, AV_LOG_INFO, "\n");
1045     }
1046
1047     if((pict_type&7)!=3){
1048         if (QPStride >= 0){
1049             int i;
1050             const int count= mbHeight * QPStride;
1051             for(i=0; i<(count>>2); i++){
1052                 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1053             }
1054             for(i<<=2; i<count; i++){
1055                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1056             }
1057         } else {
1058             int i,j;
1059             for(i=0; i<mbHeight; i++) {
1060                 for(j=0; j<absQPStride; j++) {
1061                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1062                 }
1063             }
1064         }
1065     }
1066
1067     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1068            mode->lumMode, mode->chromMode);
1069
1070     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1071                 width, height, QP_store, QPStride, 0, mode, c);
1072
1073     width  = (width )>>c->hChromaSubSample;
1074     height = (height)>>c->vChromaSubSample;
1075
1076     if(mode->chromMode){
1077         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1078                     width, height, QP_store, QPStride, 1, mode, c);
1079         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1080                     width, height, QP_store, QPStride, 2, mode, c);
1081     }
1082     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1083         linecpy(dst[1], src[1], height, srcStride[1]);
1084         linecpy(dst[2], src[2], height, srcStride[2]);
1085     }else{
1086         int y;
1087         for(y=0; y<height; y++){
1088             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1089             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1090         }
1091     }
1092 }
1093