fixed an overflow in ssim calculation

author Jim Bankoski <jimbankoski@google.com>

Mon, 28 Mar 2011 23:39:05 +0000 (16:39 -0700)

committer Yaowu Xu <yaowu@google.com>

Thu, 7 Apr 2011 21:25:25 +0000 (14:25 -0700)
author Jim Bankoski <jimbankoski@google.com>
Mon, 28 Mar 2011 23:39:05 +0000 (16:39 -0700)
committer Yaowu Xu <yaowu@google.com>
Thu, 7 Apr 2011 21:25:25 +0000 (14:25 -0700)
diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c

index 64d67c6..c78be37 100644 (file)
--- a/vp8/encoder/ssim.c
+++ b/vp8/encoder/ssim.c
@@ -290,8 +290,8 @@ void ssim_parms_8x8_c
       }
  }
  
-const static long long c1 =  426148; // (256^2*(.01*255)^2
-const static long long c2 = 3835331; //(256^2*(.03*255)^2
+const static long long cc1 =  26634; // (64^2*(.01*255)^2
+const static long long cc2 = 239708; // (64^2*(.03*255)^2
  
  static double similarity
  (
@@ -303,10 +303,19 @@ static double similarity
      int count
  )
  {
-    long long ssim_n = (2*sum_s*sum_r+ c1)*(2*count*sum_sxr-2*sum_s*sum_r+c2);
+    long long ssim_n, ssim_d;
+    long long c1, c2;
  
-    long long ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
-            (count*sum_sq_s-sum_s*sum_s + count*sum_sq_r-sum_r*sum_r +c2) ;
+    //scale the constants by number of pixels
+    c1 = (cc1*count*count)>>12;
+    c2 = (cc2*count*count)>>12;
+
+    ssim_n = (2*sum_s*sum_r+ c1)*((long long) 2*count*sum_sxr-
+          (long long) 2*sum_s*sum_r+c2);
+
+    ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
+        ((long long)count*sum_sq_s-(long long)sum_s*sum_s +
+        (long long)count*sum_sq_r-(long long) sum_r*sum_r +c2) ;
  
      return ssim_n * 1.0 / ssim_d;
  }
@@ -332,18 +341,33 @@ long dssim(unsigned char *s,int sp, unsigned char *r,int rp,
             const vp8_variance_rtcd_vtable_t *rtcd)
  {
      unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
-    double ssim3;
-    long long ssim_n;
-    long long ssim_d;
+    long long ssim3;
+    long long ssim_n,ssim_n1,ssim_n2;
+    long long ssim_d,ssim_d1,ssim_d2;
+    long long ssim_t1,ssim_t2;
+    long long c1, c2;
+
+    // normalize by 256/64
+    c1 = cc1*16;
+    c2 = cc2*16;
  
      rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
-    ssim_n = (2*sum_s*sum_r+ c1)*(2*256*sum_sxr-2*sum_s*sum_r+c2);
+    ssim_n1 = (2*sum_s*sum_r+ c1);
  
-    ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
-            (256*sum_sq_s-sum_s*sum_s + 256*sum_sq_r-sum_r*sum_r +c2) ;
+    ssim_n2 =((long long) 2*256*sum_sxr-(long long) 2*sum_s*sum_r+c2);
  
-    ssim3 = 256 * (ssim_d-ssim_n) / ssim_d;
-    return (long)( 256*ssim3 * ssim3 );
+    ssim_d1 =((long long)sum_s*sum_s +(long long)sum_r*sum_r+c1);
+
+    ssim_d2 = (256 * (long long) sum_sq_s-(long long) sum_s*sum_s +
+                    (long long) 256*sum_sq_r-(long long) sum_r*sum_r +c2) ;
+
+    ssim_t1 = 256 - 256 * ssim_n1 / ssim_d1;
+    ssim_t2 = 256 - 256 * ssim_n2 / ssim_d2;
+
+    ssim3 = 256 *ssim_t1 * ssim_t2;
+    if(ssim3 <0 )
+        ssim3=0;
+    return (long)( ssim3  );
  }
  // TODO: (jbb) this 8x8 window might be too big + we may want to pick pixels
  // such that the window regions overlap block boundaries to penalize blocking
@@ -361,18 +385,20 @@ double vp8_ssim2
  )
  {
      int i,j;
-
+    int samples =0;
      double ssim_total=0;
  
-    // we can sample points as frequently as we like start with 1 per 8x8
-    for(i=0; i < height; i+=8, img1 += stride_img1*8, img2 += stride_img2*8)
+    // we can sample points as frequently as we like start with 1 per 4x4
+    for(i=0; i < height-8; i+=4, img1 += stride_img1*4, img2 += stride_img2*4)
      {
-        for(j=0; j < width; j+=8 )
+        for(j=0; j < width-8; j+=4 )
          {
-            ssim_total += ssim_8x8(img1, stride_img1, img2, stride_img2, rtcd);
+            double v = ssim_8x8(img1+j, stride_img1, img2+j, stride_img2, rtcd);
+            ssim_total += v;
+            samples++;
          }
      }
-    ssim_total /= (width/8 * height /8);
+    ssim_total /= samples;
      return ssim_total;
  
  }
@@ -405,4 +431,4 @@ double vp8_calc_ssim
      *weight = 1;
  
      return ssimv;
-}
+}
+\ No newline at end of file
diff --git a/vp8/encoder/x86/ssim_opt.asm b/vp8/encoder/x86/ssim_opt.asm

index c267cdb..d6cebf3 100644 (file)
--- a/vp8/encoder/x86/ssim_opt.asm
+++ b/vp8/encoder/x86/ssim_opt.asm
@@ -16,12 +16,12 @@
          paddusw         xmm14, xmm4  ; sum_r
          movdqa          xmm1, xmm3
          pmaddwd         xmm1, xmm1
-        paddq           xmm13, xmm1 ; sum_sq_s
+        paddd           xmm13, xmm1 ; sum_sq_s
          movdqa          xmm2, xmm4
          pmaddwd         xmm2, xmm2
-        paddq           xmm12, xmm2 ; sum_sq_r
+        paddd           xmm12, xmm2 ; sum_sq_r
          pmaddwd         xmm3, xmm4
-        paddq           xmm11, xmm3  ; sum_sxr
+        paddd           xmm11, xmm3  ; sum_sxr
  %endmacro
  
  ; Sum across the register %1 starting with q words
@@ -66,6 +66,7 @@ sym(vp8_ssim_parms_16x16_sse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 9
+    SAVE_XMM
      push        rsi
      push        rdi
      ; end prolog
@@ -115,19 +116,20 @@ NextRow:
      SUM_ACROSS_Q    xmm11
  
      mov             rdi,arg(4)
-    movq            [rdi], xmm15;
+    movd            [rdi], xmm15;
      mov             rdi,arg(5)
-    movq            [rdi], xmm14;
+    movd            [rdi], xmm14;
      mov             rdi,arg(6)
-    movq            [rdi], xmm13;
+    movd            [rdi], xmm13;
      mov             rdi,arg(7)
-    movq            [rdi], xmm12;
+    movd            [rdi], xmm12;
      mov             rdi,arg(8)
-    movq            [rdi], xmm11;
+    movd            [rdi], xmm11;
  
      ; begin epilog
      pop         rdi
      pop         rsi
+    RESTORE_XMM
      UNSHADOW_ARGS
      pop         rbp
      ret
@@ -154,6 +156,7 @@ sym(vp8_ssim_parms_8x8_sse3):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 9
+    SAVE_XMM
      push        rsi
      push        rdi
      ; end prolog
@@ -174,11 +177,8 @@ sym(vp8_ssim_parms_8x8_sse3):
  NextRow2:
  
      ;grab source and reference pixels
-    movq            xmm5, [rsi]
-    movq            xmm6, [rdi]
-
-    movdqa          xmm3, xmm5
-    movdqa          xmm4, xmm6
+    movq            xmm3, [rsi]
+    movq            xmm4, [rdi]
      punpcklbw       xmm3, xmm0 ; low_s
      punpcklbw       xmm4, xmm0 ; low_r
  
@@ -197,19 +197,20 @@ NextRow2:
      SUM_ACROSS_Q    xmm11
  
      mov             rdi,arg(4)
-    movq            [rdi], xmm15;
+    movd            [rdi], xmm15;
      mov             rdi,arg(5)
-    movq            [rdi], xmm14;
+    movd            [rdi], xmm14;
      mov             rdi,arg(6)
-    movq            [rdi], xmm13;
+    movd            [rdi], xmm13;
      mov             rdi,arg(7)
-    movq            [rdi], xmm12;
+    movd            [rdi], xmm12;
      mov             rdi,arg(8)
-    movq            [rdi], xmm11;
+    movd            [rdi], xmm11;
  
      ; begin epilog
      pop         rdi
      pop         rsi
+    RESTORE_XMM
      UNSHADOW_ARGS
      pop         rbp
      ret
author	Jim Bankoski <jimbankoski@google.com>
	Mon, 28 Mar 2011 23:39:05 +0000 (16:39 -0700)
committer	Yaowu Xu <yaowu@google.com>
	Thu, 7 Apr 2011 21:25:25 +0000 (14:25 -0700)
vp8/encoder/ssim.c		patch \| blob \| history
vp8/encoder/x86/ssim_opt.asm		patch \| blob \| history