1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
11 #include "build/build_config.h"
12 #include "media/base/simd/filter_yuv.h"
16 #if defined(COMPILER_MSVC)
17 // Warning 4799 is about calling emms before the function exits.
18 // We calls emms in a frame level so suppress this warning.
20 #pragma warning(disable: 4799)
23 void FilterYUVRows_MMX(uint8* dest,
30 // Process the unaligned bytes first.
32 (8 - (reinterpret_cast<uintptr_t>(dest) & 7)) & 7;
33 while (pixel < width && pixel < unaligned_width) {
34 dest[pixel] = (src0[pixel] * (256 - fraction) +
35 src1[pixel] * fraction) >> 8;
39 __m64 zero = _mm_setzero_si64();
40 __m64 src1_fraction = _mm_set1_pi16(fraction);
41 __m64 src0_fraction = _mm_set1_pi16(256 - fraction);
42 const __m64* src0_64 = reinterpret_cast<const __m64*>(src0 + pixel);
43 const __m64* src1_64 = reinterpret_cast<const __m64*>(src1 + pixel);
44 __m64* dest64 = reinterpret_cast<__m64*>(dest + pixel);
45 __m64* end64 = reinterpret_cast<__m64*>(
46 reinterpret_cast<uintptr_t>(dest + width) & ~7);
48 while (dest64 < end64) {
49 __m64 src0 = *src0_64++;
50 __m64 src1 = *src1_64++;
51 __m64 src2 = _mm_unpackhi_pi8(src0, zero);
52 __m64 src3 = _mm_unpackhi_pi8(src1, zero);
53 src0 = _mm_unpacklo_pi8(src0, zero);
54 src1 = _mm_unpacklo_pi8(src1, zero);
55 src0 = _mm_mullo_pi16(src0, src0_fraction);
56 src1 = _mm_mullo_pi16(src1, src1_fraction);
57 src2 = _mm_mullo_pi16(src2, src0_fraction);
58 src3 = _mm_mullo_pi16(src3, src1_fraction);
59 src0 = _mm_add_pi16(src0, src1);
60 src2 = _mm_add_pi16(src2, src3);
61 src0 = _mm_srli_pi16(src0, 8);
62 src2 = _mm_srli_pi16(src2, 8);
63 src0 = _mm_packs_pu16(src0, src2);
68 while (pixel < width) {
69 dest[pixel] = (src0[pixel] * (256 - fraction) +
70 src1[pixel] * fraction) >> 8;
75 #if defined(COMPILER_MSVC)