2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 %include "vpx_ports/x86_abi_support.asm"
14 global sym(vp9_sad16x16_mmx) PRIVATE
15 global sym(vp9_sad8x16_mmx) PRIVATE
16 global sym(vp9_sad8x8_mmx) PRIVATE
17 global sym(vp9_sad4x4_mmx) PRIVATE
18 global sym(vp9_sad16x8_mmx) PRIVATE
20 ;unsigned int vp9_sad16x16_mmx(
21 ; unsigned char *src_ptr,
23 ; unsigned char *ref_ptr,
25 sym(vp9_sad16x16_mmx):
28 SHADOW_ARGS_TO_STACK 4
33 mov rsi, arg(0) ;src_ptr
34 mov rdi, arg(2) ;ref_ptr
36 movsxd rax, dword ptr arg(1) ;src_stride
37 movsxd rdx, dword ptr arg(3) ;ref_stride
48 movq mm0, QWORD PTR [rsi]
49 movq mm2, QWORD PTR [rsi+8]
51 movq mm1, QWORD PTR [rdi]
52 movq mm3, QWORD PTR [rdi+8]
86 jne .x16x16sad_mmx_loop
112 ;unsigned int vp9_sad8x16_mmx(
113 ; unsigned char *src_ptr,
115 ; unsigned char *ref_ptr,
117 sym(vp9_sad8x16_mmx):
120 SHADOW_ARGS_TO_STACK 4
125 mov rsi, arg(0) ;src_ptr
126 mov rdi, arg(2) ;ref_ptr
128 movsxd rax, dword ptr arg(1) ;src_stride
129 movsxd rdx, dword ptr arg(3) ;ref_stride
140 movq mm0, QWORD PTR [rsi]
141 movq mm1, QWORD PTR [rdi]
161 jne .x8x16sad_mmx_loop
184 ;unsigned int vp9_sad8x8_mmx(
185 ; unsigned char *src_ptr,
187 ; unsigned char *ref_ptr,
192 SHADOW_ARGS_TO_STACK 4
197 mov rsi, arg(0) ;src_ptr
198 mov rdi, arg(2) ;ref_ptr
200 movsxd rax, dword ptr arg(1) ;src_stride
201 movsxd rdx, dword ptr arg(3) ;ref_stride
210 movq mm0, QWORD PTR [rsi]
211 movq mm1, QWORD PTR [rdi]
231 jne .x8x8sad_mmx_loop
254 ;unsigned int vp9_sad4x4_mmx(
255 ; unsigned char *src_ptr,
257 ; unsigned char *ref_ptr,
262 SHADOW_ARGS_TO_STACK 4
267 mov rsi, arg(0) ;src_ptr
268 mov rdi, arg(2) ;ref_ptr
270 movsxd rax, dword ptr arg(1) ;src_stride
271 movsxd rdx, dword ptr arg(3) ;ref_stride
273 movd mm0, DWORD PTR [rsi]
274 movd mm1, DWORD PTR [rdi]
276 movd mm2, DWORD PTR [rsi+rax]
277 movd mm3, DWORD PTR [rdi+rdx]
299 movd mm4, DWORD PTR [rsi]
300 movd mm5, DWORD PTR [rdi]
302 movd mm6, DWORD PTR [rsi+rax]
303 movd mm7, DWORD PTR [rdi+rdx]
343 ;unsigned int vp9_sad16x8_mmx(
344 ; unsigned char *src_ptr,
346 ; unsigned char *ref_ptr,
348 sym(vp9_sad16x8_mmx):
351 SHADOW_ARGS_TO_STACK 4
356 mov rsi, arg(0) ;src_ptr
357 mov rdi, arg(2) ;ref_ptr
359 movsxd rax, dword ptr arg(1) ;src_stride
360 movsxd rdx, dword ptr arg(3) ;ref_stride
407 jne .x16x8sad_mmx_loop