From 7fd643264a80dcde9c994237b2b39433d9ce96b3 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Tue, 9 Jul 2013 16:18:28 -0700 Subject: [PATCH] SSSE3 assembly for 4x4/8x8/16x16/32x32 H intra prediction. Change-Id: Iad70966b986f65259329070e258f76ef0af816b4 --- vp9/common/vp9_rtcd_defs.sh | 8 ++-- vp9/common/x86/vp9_intrapred_ssse3.asm | 87 ++++++++++++++++++++++++++++++++++ vp9/vp9_common.mk | 1 + 3 files changed, 92 insertions(+), 4 deletions(-) create mode 100644 vp9/common/x86/vp9_intrapred_ssse3.asm diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 505625b..0ecfa6e 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -64,7 +64,7 @@ prototype void vp9_d63_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, ui specialize vp9_d63_predictor_4x4 prototype void vp9_h_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col" -specialize vp9_h_predictor_4x4 +specialize vp9_h_predictor_4x4 ssse3 prototype void vp9_d117_predictor_4x4 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col" specialize vp9_d117_predictor_4x4 @@ -103,7 +103,7 @@ prototype void vp9_d63_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, ui specialize vp9_d63_predictor_8x8 prototype void vp9_h_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col" -specialize vp9_h_predictor_8x8 +specialize vp9_h_predictor_8x8 ssse3 prototype void vp9_d117_predictor_8x8 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col" specialize vp9_d117_predictor_8x8 @@ -142,7 +142,7 @@ prototype void vp9_d63_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride, specialize vp9_d63_predictor_16x16 prototype void vp9_h_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col" -specialize vp9_h_predictor_16x16 +specialize vp9_h_predictor_16x16 ssse3 prototype void vp9_d117_predictor_16x16 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col" specialize vp9_d117_predictor_16x16 @@ -181,7 +181,7 @@ prototype void vp9_d63_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride, specialize vp9_d63_predictor_32x32 prototype void vp9_h_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col" -specialize vp9_h_predictor_32x32 +specialize vp9_h_predictor_32x32 ssse3 prototype void vp9_d117_predictor_32x32 "uint8_t *ypred_ptr, ptrdiff_t y_stride, uint8_t *yabove_row, uint8_t *yleft_col" specialize vp9_d117_predictor_32x32 diff --git a/vp9/common/x86/vp9_intrapred_ssse3.asm b/vp9/common/x86/vp9_intrapred_ssse3.asm new file mode 100644 index 0000000..bc8ed5c --- /dev/null +++ b/vp9/common/x86/vp9_intrapred_ssse3.asm @@ -0,0 +1,87 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + +%include "third_party/x86inc/x86inc.asm" + +SECTION .text + +INIT_MMX ssse3 +cglobal h_predictor_4x4, 2, 4, 3, dst, stride, line, left + movifnidn leftq, leftmp + add leftq, 4 + mov lineq, -2 + pxor m0, m0 +.loop: + movd m1, [leftq+lineq*2 ] + movd m2, [leftq+lineq*2+1] + pshufb m1, m0 + pshufb m2, m0 + movd [dstq ], m1 + movd [dstq+strideq], m2 + lea dstq, [dstq+strideq*2] + inc lineq + jnz .loop + REP_RET + +INIT_MMX ssse3 +cglobal h_predictor_8x8, 2, 4, 3, dst, stride, line, left + movifnidn leftq, leftmp + add leftq, 8 + mov lineq, -4 + pxor m0, m0 +.loop: + movd m1, [leftq+lineq*2 ] + movd m2, [leftq+lineq*2+1] + pshufb m1, m0 + pshufb m2, m0 + movq [dstq ], m1 + movq [dstq+strideq], m2 + lea dstq, [dstq+strideq*2] + inc lineq + jnz .loop + REP_RET + +INIT_XMM ssse3 +cglobal h_predictor_16x16, 2, 4, 3, dst, stride, line, left + movifnidn leftq, leftmp + add leftq, 16 + mov lineq, -8 + pxor m0, m0 +.loop: + movd m1, [leftq+lineq*2 ] + movd m2, [leftq+lineq*2+1] + pshufb m1, m0 + pshufb m2, m0 + mova [dstq ], m1 + mova [dstq+strideq], m2 + lea dstq, [dstq+strideq*2] + inc lineq + jnz .loop + REP_RET + +INIT_XMM ssse3 +cglobal h_predictor_32x32, 2, 4, 3, dst, stride, line, left + movifnidn leftq, leftmp + add leftq, 32 + mov lineq, -16 + pxor m0, m0 +.loop: + movd m1, [leftq+lineq*2 ] + movd m2, [leftq+lineq*2+1] + pshufb m1, m0 + pshufb m2, m0 + mova [dstq ], m1 + mova [dstq +16], m1 + mova [dstq+strideq ], m2 + mova [dstq+strideq+16], m2 + lea dstq, [dstq+strideq*2] + inc lineq + jnz .loop + REP_RET diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 823b9d3..e17d4e3 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -82,6 +82,7 @@ VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_iwalsh_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_loopfilter_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_recon_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_intrapred_sse2.asm +VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_intrapred_ssse3.asm VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm ifeq ($(CONFIG_POSTPROC),yes) VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm -- 2.7.4