2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
11 %include "vpx_ports/x86_abi_support.asm"
13 ; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr
14 %macro TABULATE_SSIM 0
15 paddusw xmm15, xmm3 ; sum_s
16 paddusw xmm14, xmm4 ; sum_r
19 paddd xmm13, xmm1 ; sum_sq_s
22 paddd xmm12, xmm2 ; sum_sq_r
24 paddd xmm11, xmm3 ; sum_sxr
27 ; Sum across the register %1 starting with q words
39 ; Sum across the register %1 starting with q words
47 ;void ssim_parms_sse2(
52 ; unsigned long *sum_s,
53 ; unsigned long *sum_r,
54 ; unsigned long *sum_sq_s,
55 ; unsigned long *sum_sq_r,
56 ; unsigned long *sum_sxr);
58 ; TODO: Use parm passing through structure, probably don't need the pxors
59 ; ( calling app will initialize to 0 ) could easily fit everything in sse2
60 ; without too much hastle, and can probably do better estimates with psadw
61 ; or pavgb At this point this is just meant to be first pass for calculating
62 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion
63 ; in mode selection code.
64 global sym(vp9_ssim_parms_16x16_sse2) PRIVATE
65 sym(vp9_ssim_parms_16x16_sse2):
68 SHADOW_ARGS_TO_STACK 9
80 pxor xmm15,xmm15 ;sum_s
81 pxor xmm14,xmm14 ;sum_r
82 pxor xmm13,xmm13 ;sum_sq_s
83 pxor xmm12,xmm12 ;sum_sq_r
84 pxor xmm11,xmm11 ;sum_sxr
86 mov rdx, 16 ;row counter
89 ;grab source and reference pixels
94 punpckhbw xmm3, xmm0 ; high_s
95 punpckhbw xmm4, xmm0 ; high_r
101 punpcklbw xmm3, xmm0 ; low_s
102 punpcklbw xmm4, xmm0 ; low_r
106 add rsi, rcx ; next s row
107 add rdi, rax ; next r row
137 ;void ssim_parms_sse2(
142 ; unsigned long *sum_s,
143 ; unsigned long *sum_r,
144 ; unsigned long *sum_sq_s,
145 ; unsigned long *sum_sq_r,
146 ; unsigned long *sum_sxr);
148 ; TODO: Use parm passing through structure, probably don't need the pxors
149 ; ( calling app will initialize to 0 ) could easily fit everything in sse2
150 ; without too much hastle, and can probably do better estimates with psadw
151 ; or pavgb At this point this is just meant to be first pass for calculating
152 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion
153 ; in mode selection code.
154 global sym(vp9_ssim_parms_8x8_sse2) PRIVATE
155 sym(vp9_ssim_parms_8x8_sse2):
158 SHADOW_ARGS_TO_STACK 9
170 pxor xmm15,xmm15 ;sum_s
171 pxor xmm14,xmm14 ;sum_r
172 pxor xmm13,xmm13 ;sum_sq_s
173 pxor xmm12,xmm12 ;sum_sq_r
174 pxor xmm11,xmm11 ;sum_sxr
176 mov rdx, 8 ;row counter
179 ;grab source and reference pixels
182 punpcklbw xmm3, xmm0 ; low_s
183 punpcklbw xmm4, xmm0 ; low_r
187 add rsi, rcx ; next s row
188 add rdi, rax ; next r row