interlaced dct decision cleanup

author Michael Niedermayer <michaelni@gmx.at>

Mon, 5 Jan 2004 22:57:07 +0000 (22:57 +0000)

committer Michael Niedermayer <michaelni@gmx.at>

Mon, 5 Jan 2004 22:57:07 +0000 (22:57 +0000)
author Michael Niedermayer <michaelni@gmx.at>
Mon, 5 Jan 2004 22:57:07 +0000 (22:57 +0000)
committer Michael Niedermayer <michaelni@gmx.at>
Mon, 5 Jan 2004 22:57:07 +0000 (22:57 +0000)
diff --git a/ffmpeg.c b/ffmpeg.c

index 5249264..c603ba1 100644 (file)
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -111,6 +111,7 @@ static int video_codec_id = CODEC_ID_NONE;
  static int same_quality = 0;
  static int b_frames = 0;
  static int mb_decision = FF_MB_DECISION_SIMPLE;
+static int ildct_cmp = FF_CMP_VSAD;
  static int mb_cmp = FF_CMP_SAD;
  static int sub_cmp = FF_CMP_SAD;
  static int cmp = FF_CMP_SAD;
@@ -1639,7 +1640,7 @@ static void opt_video_bitrate_min(const char *arg)
  
  static void opt_video_buffer_size(const char *arg)
  {
-    video_rc_buffer_size = atoi(arg) * 1024;
+    video_rc_buffer_size = atoi(arg) * 8*1024;
  }
  
  static void opt_video_rc_eq(char *arg)
@@ -1841,6 +1842,11 @@ static void opt_mb_cmp(const char *arg)
      mb_cmp = atoi(arg);
  }
  
+static void opt_ildct_cmp(const char *arg)
+{
+    ildct_cmp = atoi(arg);
+}
+
  static void opt_sub_cmp(const char *arg)
  {
      sub_cmp = atoi(arg);
@@ -2372,6 +2378,7 @@ static void opt_output_file(const char *filename)
  
                  video_enc->mb_decision = mb_decision;
                  video_enc->mb_cmp = mb_cmp;
+                video_enc->ildct_cmp = ildct_cmp;
                  video_enc->me_sub_cmp = sub_cmp;
                  video_enc->me_cmp = cmp;
                  
@@ -3000,7 +3007,7 @@ const OptionDef options[] = {
      { "bt", HAS_ARG | OPT_VIDEO, {(void*)opt_video_bitrate_tolerance}, "set video bitrate tolerance (in kbit/s)", "tolerance" },
      { "maxrate", HAS_ARG | OPT_VIDEO, {(void*)opt_video_bitrate_max}, "set max video bitrate tolerance (in kbit/s)", "bitrate" },
      { "minrate", HAS_ARG | OPT_VIDEO, {(void*)opt_video_bitrate_min}, "set min video bitrate tolerance (in kbit/s)", "bitrate" },
-    { "bufsize", HAS_ARG | OPT_VIDEO, {(void*)opt_video_buffer_size}, "set ratecontrol buffere size (in kbit)", "size" },
+    { "bufsize", HAS_ARG | OPT_VIDEO, {(void*)opt_video_buffer_size}, "set ratecontrol buffere size (in kByte)", "size" },
      { "vcodec", HAS_ARG | OPT_VIDEO, {(void*)opt_video_codec}, "force video codec ('copy' to copy stream)", "codec" },
      { "me", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_motion_estimation}, "set motion estimation method", 
        "method" },
@@ -3012,6 +3019,7 @@ const OptionDef options[] = {
      { "hq", OPT_BOOL, {(void*)&mb_decision}, "activate high quality settings" },
      { "mbd", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_mb_decision}, "macroblock decision", "mode" },
      { "mbcmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_mb_cmp}, "macroblock compare function", "cmp function" },
+    { "ildctcmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_ildct_cmp}, "ildct compare function", "cmp function" },
      { "subcmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_sub_cmp}, "subpel compare function", "cmp function" },
      { "cmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_cmp}, "fullpel compare function", "cmp function" },
      { "4mv", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_4mv}, "use four motion vector by macroblock (MPEG4)" },
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h

index 55c9376..df6690a 100644 (file)
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -17,7 +17,7 @@ extern "C" {
  
  #define FFMPEG_VERSION_INT     0x000408
  #define FFMPEG_VERSION         "0.4.8"
-#define LIBAVCODEC_BUILD       4698
+#define LIBAVCODEC_BUILD       4699
  
  #define LIBAVCODEC_VERSION_INT FFMPEG_VERSION_INT
  #define LIBAVCODEC_VERSION     FFMPEG_VERSION
@@ -1196,6 +1196,12 @@ typedef struct AVCodecContext {
       * - decoding: unused
       */
      int mb_cmp;
+    /**
+     * interlaced dct compare function
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int ildct_cmp;
  #define FF_CMP_SAD  0
  #define FF_CMP_SSE  1
  #define FF_CMP_SATD 2
@@ -1204,6 +1210,8 @@ typedef struct AVCodecContext {
  #define FF_CMP_BIT  5
  #define FF_CMP_RD   6
  #define FF_CMP_ZERO 7
+#define FF_CMP_VSAD 8
+#define FF_CMP_VSSE 9
  #define FF_CMP_CHROMA 256
      
      /**
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c

index bc2ef8c..a6a4179 100644 (file)
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2560,6 +2560,53 @@ void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scant
      }
  }
  
+static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
+    return 0;
+}
+
+void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
+    int i;
+    
+    memset(cmp, 0, sizeof(void*)*5);
+        
+    for(i=0; i<5; i++){
+        switch(type&0xFF){
+        case FF_CMP_SAD:
+            cmp[i]= c->sad[i];
+            break;
+        case FF_CMP_SATD:
+            cmp[i]= c->hadamard8_diff[i];
+            break;
+        case FF_CMP_SSE:
+            cmp[i]= c->sse[i];
+            break;
+        case FF_CMP_DCT:
+            cmp[i]= c->dct_sad[i];
+            break;
+        case FF_CMP_PSNR:
+            cmp[i]= c->quant_psnr[i];
+            break;
+        case FF_CMP_BIT:
+            cmp[i]= c->bit[i];
+            break;
+        case FF_CMP_RD:
+            cmp[i]= c->rd[i];
+            break;
+        case FF_CMP_VSAD:
+            cmp[i]= c->vsad[i];
+            break;
+        case FF_CMP_VSSE:
+            cmp[i]= c->vsse[i];
+            break;
+        case FF_CMP_ZERO:
+            cmp[i]= zero_cmp;
+            break;
+        default:
+            av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
+        }
+    }
+}
+
  /**
   * memset(blocks, 0, sizeof(DCTELEM)*6*64)
   */
@@ -2685,17 +2732,19 @@ if(sum>maxi){
      return sum;
  }
  
-static int hadamard8_abs_c(uint8_t *src, int stride, int mean){
+static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
      int i;
      int temp[64];
      int sum=0;
-//FIXME OOOPS ignore 0 term instead of mean mess
+    
+    assert(h==8);
+    
      for(i=0; i<8; i++){
          //FIXME try pointer walks
-        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-mean,src[stride*i+1]-mean);
-        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-mean,src[stride*i+3]-mean);
-        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-mean,src[stride*i+5]-mean);
-        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-mean,src[stride*i+7]-mean);
+        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
+        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
+        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
+        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
          
          BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
          BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
@@ -2726,6 +2775,8 @@ static int hadamard8_abs_c(uint8_t *src, int stride, int mean){
              +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
      }
      
+    sum -= ABS(temp[8*0] + temp[8*4]); // -mean
+    
      return sum;
  }
  
@@ -2911,7 +2962,69 @@ static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in
      return bits;
  }
  
+static int vsad_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
+    int score=0;
+    int x,y;
+    
+    for(y=1; y<h; y++){
+        for(x=0; x<16; x+=4){
+            score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride]) 
+                   +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
+        }
+        s+= stride;
+    }
+    
+    return score;
+}
+
+static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
+    int score=0;
+    int x,y;
+    
+    for(y=1; y<h; y++){
+        for(x=0; x<16; x++){
+            score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+    
+    return score;
+}
+
+#define SQ(a) ((a)*(a))
+static int vsse_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
+    int score=0;
+    int x,y;
+    
+    for(y=1; y<h; y++){
+        for(x=0; x<16; x+=4){
+            score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride]) 
+                   +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
+        }
+        s+= stride;
+    }
+    
+    return score;
+}
+
+static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
+    int score=0;
+    int x,y;
+    
+    for(y=1; y<h; y++){
+        for(x=0; x<16; x++){
+            score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+    
+    return score;
+}
+
  WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
+WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
  WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
  WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
  WARPER8_16_SQ(rd8x8_c, rd16_c)
@@ -3095,13 +3208,12 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
      c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
      c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
          
-    c->hadamard8_abs = hadamard8_abs_c;
-
  #define SET_CMP_FUNC(name) \
      c->name[0]= name ## 16_c;\
      c->name[1]= name ## 8x8_c;
      
      SET_CMP_FUNC(hadamard8_diff)
+    c->hadamard8_diff[4]= hadamard8_intra16_c;
      SET_CMP_FUNC(dct_sad)
      c->sad[0]= pix_abs16_c;
      c->sad[1]= pix_abs8_c;
@@ -3110,6 +3222,10 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
      SET_CMP_FUNC(quant_psnr)
      SET_CMP_FUNC(rd)
      SET_CMP_FUNC(bit)
+    c->vsad[0]= vsad16_c;
+    c->vsad[4]= vsad_intra16_c;
+    c->vsse[0]= vsse16_c;
+    c->vsse[4]= vsse_intra16_c;
          
      c->add_bytes= add_bytes_c;
      c->diff_bytes= diff_bytes_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h

index baaba2b..96eaac7 100644 (file)
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -138,21 +138,22 @@ typedef struct DSPContext {
      int (*pix_norm1)(uint8_t * pix, int line_size);
  // 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
      
-    me_cmp_func sad[4]; /* identical to pix_absAxA except additional void * */
-    me_cmp_func sse[4];
-    me_cmp_func hadamard8_diff[4];
-    me_cmp_func dct_sad[4];
-    me_cmp_func quant_psnr[4];
-    me_cmp_func bit[4];
-    me_cmp_func rd[4];
-    int (*hadamard8_abs )(uint8_t *src, int stride, int mean);
+    me_cmp_func sad[5]; /* identical to pix_absAxA except additional void * */
+    me_cmp_func sse[5];
+    me_cmp_func hadamard8_diff[5];
+    me_cmp_func dct_sad[5];
+    me_cmp_func quant_psnr[5];
+    me_cmp_func bit[5];
+    me_cmp_func rd[5];
+    me_cmp_func vsad[5];
+    me_cmp_func vsse[5];
  
      me_cmp_func me_pre_cmp[5];
      me_cmp_func me_cmp[5];
      me_cmp_func me_sub_cmp[5];
      me_cmp_func mb_cmp[5];
+    me_cmp_func ildct_cmp[5]; //only width 16 used
  
-    /* maybe create an array for 16/8/4/2 functions */
      /**
       * Halfpel motion compensation with rounding (a+b+1)>>1.
       * this is an array[4][4] of motion compensation funcions for 4 
@@ -293,6 +294,8 @@ void dsputil_init(DSPContext* p, AVCodecContext *avctx);
   */
  void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
  
+void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
+
  #define        BYTE_VEC32(c)   ((c)*0x01010101UL)
  
  static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c

index 341aa0a..c760f56 100644 (file)
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -22,6 +22,9 @@
  #include "../dsputil.h"
  #include "../simple_idct.h"
  
+//#undef NDEBUG
+//#include <assert.h>
+
  extern const uint8_t ff_h263_loop_filter_strength[32];
  
  int mm_flags; /* multimedia extension flags */
@@ -747,6 +750,246 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
      return tmp;
  }
  
+static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) {
+    int tmp;
+    
+    assert( (((int)pix) & 7) == 0);
+    assert((line_size &7) ==0);
+    
+#define SUM(in0, in1, out0, out1) \
+      "movq (%0), %%mm2\n"\
+      "movq 8(%0), %%mm3\n"\
+      "addl %2,%0\n"\
+      "movq %%mm2, " #out0 "\n"\
+      "movq %%mm3, " #out1 "\n"\
+      "psubusb " #in0 ", %%mm2\n"\
+      "psubusb " #in1 ", %%mm3\n"\
+      "psubusb " #out0 ", " #in0 "\n"\
+      "psubusb " #out1 ", " #in1 "\n"\
+      "por %%mm2, " #in0 "\n"\
+      "por %%mm3, " #in1 "\n"\
+      "movq " #in0 ", %%mm2\n"\
+      "movq " #in1 ", %%mm3\n"\
+      "punpcklbw %%mm7, " #in0 "\n"\
+      "punpcklbw %%mm7, " #in1 "\n"\
+      "punpckhbw %%mm7, %%mm2\n"\
+      "punpckhbw %%mm7, %%mm3\n"\
+      "paddw " #in1 ", " #in0 "\n"\
+      "paddw %%mm3, %%mm2\n"\
+      "paddw %%mm2, " #in0 "\n"\
+      "paddw " #in0 ", %%mm6\n"
+
+    
+  asm volatile (
+      "movl %3,%%ecx\n"
+      "pxor %%mm6,%%mm6\n"
+      "pxor %%mm7,%%mm7\n"
+      "movq (%0),%%mm0\n"
+      "movq 8(%0),%%mm1\n"
+      "addl %2,%0\n"
+      "subl $2, %%ecx\n"
+      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
+      "1:\n"
+      
+      SUM(%%mm4, %%mm5, %%mm0, %%mm1)
+      
+      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
+      
+      "subl $2, %%ecx\n"
+      "jnz 1b\n"
+
+      "movq %%mm6,%%mm0\n"
+      "psrlq $32, %%mm6\n"
+      "paddw %%mm6,%%mm0\n"
+      "movq %%mm0,%%mm6\n"
+      "psrlq $16, %%mm0\n"
+      "paddw %%mm6,%%mm0\n"
+      "movd %%mm0,%1\n"
+      : "+r" (pix), "=r"(tmp) 
+      : "r" (line_size) , "m" (h)
+      : "%ecx");
+    return tmp & 0xFFFF;
+}
+#undef SUM
+
+static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) {
+    int tmp;
+    
+    assert( (((int)pix) & 7) == 0);
+    assert((line_size &7) ==0);
+    
+#define SUM(in0, in1, out0, out1) \
+      "movq (%0), " #out0 "\n"\
+      "movq 8(%0), " #out1 "\n"\
+      "addl %2,%0\n"\
+      "psadbw " #out0 ", " #in0 "\n"\
+      "psadbw " #out1 ", " #in1 "\n"\
+      "paddw " #in1 ", " #in0 "\n"\
+      "paddw " #in0 ", %%mm6\n"
+
+  asm volatile (
+      "movl %3,%%ecx\n"
+      "pxor %%mm6,%%mm6\n"
+      "pxor %%mm7,%%mm7\n"
+      "movq (%0),%%mm0\n"
+      "movq 8(%0),%%mm1\n"
+      "addl %2,%0\n"
+      "subl $2, %%ecx\n"
+      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
+      "1:\n"
+      
+      SUM(%%mm4, %%mm5, %%mm0, %%mm1)
+      
+      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
+      
+      "subl $2, %%ecx\n"
+      "jnz 1b\n"
+
+      "movd %%mm6,%1\n"
+      : "+r" (pix), "=r"(tmp) 
+      : "r" (line_size) , "m" (h)
+      : "%ecx");
+    return tmp;
+}
+#undef SUM
+
+static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
+    int tmp;
+    
+    assert( (((int)pix1) & 7) == 0);
+    assert( (((int)pix2) & 7) == 0);
+    assert((line_size &7) ==0);
+    
+#define SUM(in0, in1, out0, out1) \
+      "movq (%0),%%mm2\n"\
+      "movq (%1)," #out0 "\n"\
+      "movq 8(%0),%%mm3\n"\
+      "movq 8(%1)," #out1 "\n"\
+      "addl %3,%0\n"\
+      "addl %3,%1\n"\
+      "psubb " #out0 ", %%mm2\n"\
+      "psubb " #out1 ", %%mm3\n"\
+      "pxor %%mm7, %%mm2\n"\
+      "pxor %%mm7, %%mm3\n"\
+      "movq %%mm2, " #out0 "\n"\
+      "movq %%mm3, " #out1 "\n"\
+      "psubusb " #in0 ", %%mm2\n"\
+      "psubusb " #in1 ", %%mm3\n"\
+      "psubusb " #out0 ", " #in0 "\n"\
+      "psubusb " #out1 ", " #in1 "\n"\
+      "por %%mm2, " #in0 "\n"\
+      "por %%mm3, " #in1 "\n"\
+      "movq " #in0 ", %%mm2\n"\
+      "movq " #in1 ", %%mm3\n"\
+      "punpcklbw %%mm7, " #in0 "\n"\
+      "punpcklbw %%mm7, " #in1 "\n"\
+      "punpckhbw %%mm7, %%mm2\n"\
+      "punpckhbw %%mm7, %%mm3\n"\
+      "paddw " #in1 ", " #in0 "\n"\
+      "paddw %%mm3, %%mm2\n"\
+      "paddw %%mm2, " #in0 "\n"\
+      "paddw " #in0 ", %%mm6\n"
+
+    
+  asm volatile (
+      "movl %4,%%ecx\n"
+      "pxor %%mm6,%%mm6\n"
+      "pcmpeqw %%mm7,%%mm7\n"
+      "psllw $15, %%mm7\n"
+      "packsswb %%mm7, %%mm7\n"
+      "movq (%0),%%mm0\n"
+      "movq (%1),%%mm2\n"
+      "movq 8(%0),%%mm1\n"
+      "movq 8(%1),%%mm3\n"
+      "addl %3,%0\n"
+      "addl %3,%1\n"
+      "subl $2, %%ecx\n"
+      "psubb %%mm2, %%mm0\n"
+      "psubb %%mm3, %%mm1\n"
+      "pxor %%mm7, %%mm0\n"
+      "pxor %%mm7, %%mm1\n"
+      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
+      "1:\n"
+      
+      SUM(%%mm4, %%mm5, %%mm0, %%mm1)
+      
+      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
+      
+      "subl $2, %%ecx\n"
+      "jnz 1b\n"
+
+      "movq %%mm6,%%mm0\n"
+      "psrlq $32, %%mm6\n"
+      "paddw %%mm6,%%mm0\n"
+      "movq %%mm0,%%mm6\n"
+      "psrlq $16, %%mm0\n"
+      "paddw %%mm6,%%mm0\n"
+      "movd %%mm0,%2\n"
+      : "+r" (pix1), "+r" (pix2), "=r"(tmp) 
+      : "r" (line_size) , "m" (h)
+      : "%ecx");
+    return tmp & 0x7FFF;
+}
+#undef SUM
+
+static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
+    int tmp;
+    
+    assert( (((int)pix1) & 7) == 0);
+    assert( (((int)pix2) & 7) == 0);
+    assert((line_size &7) ==0);
+    
+#define SUM(in0, in1, out0, out1) \
+      "movq (%0)," #out0 "\n"\
+      "movq (%1),%%mm2\n"\
+      "movq 8(%0)," #out1 "\n"\
+      "movq 8(%1),%%mm3\n"\
+      "addl %3,%0\n"\
+      "addl %3,%1\n"\
+      "psubb %%mm2, " #out0 "\n"\
+      "psubb %%mm3, " #out1 "\n"\
+      "pxor %%mm7, " #out0 "\n"\
+      "pxor %%mm7, " #out1 "\n"\
+      "psadbw " #out0 ", " #in0 "\n"\
+      "psadbw " #out1 ", " #in1 "\n"\
+      "paddw " #in1 ", " #in0 "\n"\
+      "paddw " #in0 ", %%mm6\n"
+
+  asm volatile (
+      "movl %4,%%ecx\n"
+      "pxor %%mm6,%%mm6\n"
+      "pcmpeqw %%mm7,%%mm7\n"
+      "psllw $15, %%mm7\n"
+      "packsswb %%mm7, %%mm7\n"
+      "movq (%0),%%mm0\n"
+      "movq (%1),%%mm2\n"
+      "movq 8(%0),%%mm1\n"
+      "movq 8(%1),%%mm3\n"
+      "addl %3,%0\n"
+      "addl %3,%1\n"
+      "subl $2, %%ecx\n"
+      "psubb %%mm2, %%mm0\n"
+      "psubb %%mm3, %%mm1\n"
+      "pxor %%mm7, %%mm0\n"
+      "pxor %%mm7, %%mm1\n"
+      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
+      "1:\n"
+      
+      SUM(%%mm4, %%mm5, %%mm0, %%mm1)
+      
+      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
+      
+      "subl $2, %%ecx\n"
+      "jnz 1b\n"
+
+      "movd %%mm6,%2\n"
+      : "+r" (pix1), "+r" (pix2), "=r"(tmp) 
+      : "r" (line_size) , "m" (h)
+      : "%ecx");
+    return tmp;
+}
+#undef SUM
+
  static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
      int i=0;
      asm volatile(
@@ -1874,6 +2117,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
          
         c->pix_norm1 = pix_norm1_mmx;
         c->sse[0] = sse16_mmx;
+        c->vsad[4]= vsad_intra16_mmx;
+
+        if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+            c->vsad[0] = vsad16_mmx;
+        }
  #endif //CONFIG_ENCODERS
  
          c->h263_v_loop_filter= h263_v_loop_filter_mmx;
@@ -1897,6 +2145,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
  #ifdef CONFIG_ENCODERS
              c->hadamard8_diff[0]= hadamard8_diff16_mmx2;
              c->hadamard8_diff[1]= hadamard8_diff_mmx2;
+            c->vsad[4]= vsad_intra16_mmx2;
  #endif //CONFIG_ENCODERS
  
              if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
@@ -1906,6 +2155,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
                  c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
                  c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
                  c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
+                c->vsad[0] = vsad16_mmx2;
              }
  
  #if 1
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c

index cfdbea9..42d0055 100644 (file)
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -277,49 +277,6 @@ if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*yma
  #undef INIT
  #undef CMP__DIRECT
  
-
-static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
-    return 0;
-}
-
-static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){
-    DSPContext* c= &s->dsp;
-    int i;
-    
-    memset(cmp, 0, sizeof(void*)*5);
-        
-    for(i=0; i<4; i++){
-        switch(type&0xFF){
-        case FF_CMP_SAD:
-            cmp[i]= c->sad[i];
-            break;
-        case FF_CMP_SATD:
-            cmp[i]= c->hadamard8_diff[i];
-            break;
-        case FF_CMP_SSE:
-            cmp[i]= c->sse[i];
-            break;
-        case FF_CMP_DCT:
-            cmp[i]= c->dct_sad[i];
-            break;
-        case FF_CMP_PSNR:
-            cmp[i]= c->quant_psnr[i];
-            break;
-        case FF_CMP_BIT:
-            cmp[i]= c->bit[i];
-            break;
-        case FF_CMP_RD:
-            cmp[i]= c->rd[i];
-            break;
-        case FF_CMP_ZERO:
-            cmp[i]= zero_cmp;
-            break;
-        default:
-            av_log(s->avctx, AV_LOG_ERROR,"internal error in cmp function selection\n");
-        }
-    }
-}
-
  static inline int get_penalty_factor(MpegEncContext *s, int type){
      switch(type&0xFF){
      default:
@@ -340,10 +297,10 @@ static inline int get_penalty_factor(MpegEncContext *s, int type){
  }
  
  void ff_init_me(MpegEncContext *s){
-    set_cmp(s, s->dsp.me_pre_cmp, s->avctx->me_pre_cmp);
-    set_cmp(s, s->dsp.me_cmp, s->avctx->me_cmp);
-    set_cmp(s, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
-    set_cmp(s, s->dsp.mb_cmp, s->avctx->mb_cmp);
+    ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, s->avctx->me_pre_cmp);
+    ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
+    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
+    ff_set_cmp(&s->dsp, s->dsp.mb_cmp, s->avctx->mb_cmp);
  
      if(s->flags&CODEC_FLAG_QPEL){
          if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
@@ -1783,6 +1740,10 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
          }
           //FIXME something smarter
          if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
+#if 0        
+        if(s->out_format == FMT_MPEG1)
+            type |= CANDIDATE_MB_TYPE_INTRA;
+#endif
      }
  
      s->mb_type[mb_y*s->mb_stride + mb_x]= type;
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c

index 6bd124c..d28023a 100644 (file)
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -973,6 +973,8 @@ int MPV_encode_init(AVCodecContext *avctx)
      s->progressive_frame= 
      s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
      
+    ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
+    
      ff_init_me(s);
  
  #ifdef CONFIG_ENCODERS
@@ -3168,71 +3170,6 @@ static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index
          av_log(s->avctx, AV_LOG_INFO, "warning, cliping %d dct coefficents to %d..%d\n", overflow, minlevel, maxlevel);
  }
  
-#if 0
-static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
-    int score=0;
-    int x,y;
-    
-    for(y=0; y<7; y++){
-        for(x=0; x<16; x+=4){
-            score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride]) 
-                   +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
-        }
-        s+= stride;
-    }
-    
-    return score;
-}
-
-static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
-    int score=0;
-    int x,y;
-    
-    for(y=0; y<7; y++){
-        for(x=0; x<16; x++){
-            score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
-        }
-        s1+= stride;
-        s2+= stride;
-    }
-    
-    return score;
-}
-#else
-#define SQ(a) ((a)*(a))
-
-static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
-    int score=0;
-    int x,y;
-    
-    for(y=0; y<7; y++){
-        for(x=0; x<16; x+=4){
-            score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride]) 
-                   +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
-        }
-        s+= stride;
-    }
-    
-    return score;
-}
-
-static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
-    int score=0;
-    int x,y;
-    
-    for(y=0; y<7; y++){
-        for(x=0; x<16; x++){
-            score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
-        }
-        s1+= stride;
-        s2+= stride;
-    }
-    
-    return score;
-}
-
-#endif
-
  #endif //CONFIG_ENCODERS
  
  /**
@@ -3352,16 +3289,20 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
          if(s->flags&CODEC_FLAG_INTERLACED_DCT){
              int progressive_score, interlaced_score;
  
-            progressive_score= pix_vcmp16x8(ptr, wrap_y  ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y );
-            interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y  , wrap_y*2);
-            
-            if(progressive_score > interlaced_score + 100){
-                s->interlaced_dct=1;
+            s->interlaced_dct=0;
+            progressive_score= s->dsp.ildct_cmp[4](s, ptr           , NULL, wrap_y, 8) 
+                              +s->dsp.ildct_cmp[4](s, ptr + wrap_y*8, NULL, wrap_y, 8) - 400;
+
+            if(progressive_score > 0){
+                interlaced_score = s->dsp.ildct_cmp[4](s, ptr           , NULL, wrap_y*2, 8) 
+                                  +s->dsp.ildct_cmp[4](s, ptr + wrap_y  , NULL, wrap_y*2, 8);
+                if(progressive_score > interlaced_score){
+                    s->interlaced_dct=1;
              
-                dct_offset= wrap_y;
-                wrap_y<<=1;
-            }else
-                s->interlaced_dct=0;
+                    dct_offset= wrap_y;
+                    wrap_y<<=1;
+                }
+            }
          }
          
         s->dsp.get_pixels(s->block[0], ptr                 , wrap_y);
@@ -3430,19 +3371,24 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
          
          if(s->flags&CODEC_FLAG_INTERLACED_DCT){
              int progressive_score, interlaced_score;
+
+            s->interlaced_dct=0;
+            progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8) 
+                              +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
              
-            progressive_score= pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y  ) 
-                             + pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y  );
-            interlaced_score = pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y*2)
-                             + pix_diff_vcmp16x8(ptr_y + wrap_y  , dest_y + wrap_y  , wrap_y*2);
+            if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
+
+            if(progressive_score>0){
+                interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8) 
+                                  +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
              
-            if(progressive_score > interlaced_score + 600){
-                s->interlaced_dct=1;
+                if(progressive_score > interlaced_score){
+                    s->interlaced_dct=1;
              
-                dct_offset= wrap_y;
-                wrap_y<<=1;
-            }else
-                s->interlaced_dct=0;
+                    dct_offset= wrap_y;
+                    wrap_y<<=1;
+                }
+            }
          }
          
         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
diff --git a/tests/ffmpeg.regression.ref b/tests/ffmpeg.regression.ref

index 5ccc1da..321bc6c 100644 (file)
--- a/tests/ffmpeg.regression.ref
+++ b/tests/ffmpeg.regression.ref
@@ -5,9 +5,9 @@ stddev:  7.63 PSNR:30.47 bytes:7602176
  b588110bebb48b5a1815ac26d0f0c9cc *./data/a-mpeg2.mpg
  ddfa5c618dab54df0f47976ddd55d90f *./data/out.yuv
  stddev:  7.65 PSNR:30.44 bytes:7602176
-826f088b9b3d051642f51e05860c9738 *./data/a-mpeg2i.mpg
-af80cb3a57800a0870273f62697ba29f *./data/out.yuv
-stddev:  7.93 PSNR:30.13 bytes:7602176
+13336cffcba456ff4a7607b2a7e57b33 *./data/a-mpeg2i.mpg
+4c9701eb83ed81dd9a328af83d7d7c8a *./data/out.yuv
+stddev:  7.66 PSNR:30.43 bytes:7602176
  d0dc46dd831398237a690ebbeff18b64 *./data/a-msmpeg4v2.avi
  712aa6c959d1d90a78fe98657cbff19c *./data/out.yuv
  stddev:  8.11 PSNR:29.94 bytes:7602176
diff --git a/tests/rotozoom.regression.ref b/tests/rotozoom.regression.ref

index f9ced39..24402a2 100644 (file)
--- a/tests/rotozoom.regression.ref
+++ b/tests/rotozoom.regression.ref
@@ -5,9 +5,9 @@ stddev:  4.93 PSNR:34.25 bytes:7602176
  aa0f088777131d8ffb627e6ff37312ca *./data/a-mpeg2.mpg
  830e7d798089ea6213e0867fd7676fde *./data/out.yuv
  stddev:  4.95 PSNR:34.22 bytes:7602176
-aff7511e16a07314cac0489d3dbc4477 *./data/a-mpeg2i.mpg
-6199bac131333a8dba043e69b2071dd0 *./data/out.yuv
-stddev:  4.97 PSNR:34.19 bytes:7602176
+6da01fd0d910fbfcdc5b212ef3dd65cb *./data/a-mpeg2i.mpg
+1e21fd7ed53abf352f9ea8548afa80a3 *./data/out.yuv
+stddev:  4.96 PSNR:34.20 bytes:7602176
  14db391f167b52b21a983157b410affc *./data/a-msmpeg4v2.avi
  fc8881e0904af9491d5fa0163183954b *./data/out.yuv
  stddev:  5.29 PSNR:33.64 bytes:7602176
author	Michael Niedermayer <michaelni@gmx.at>
	Mon, 5 Jan 2004 22:57:07 +0000 (22:57 +0000)
committer	Michael Niedermayer <michaelni@gmx.at>
	Mon, 5 Jan 2004 22:57:07 +0000 (22:57 +0000)
ffmpeg.c		patch \| blob \| history
libavcodec/avcodec.h		patch \| blob \| history
libavcodec/dsputil.c		patch \| blob \| history
libavcodec/dsputil.h		patch \| blob \| history
libavcodec/i386/dsputil_mmx.c		patch \| blob \| history
libavcodec/motion_est.c		patch \| blob \| history
libavcodec/mpegvideo.c		patch \| blob \| history
tests/ffmpeg.regression.ref		patch \| blob \| history
tests/rotozoom.regression.ref		patch \| blob \| history