1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 ; Use of this source code is governed by a BSD-style license that can be
3 ; found in the LICENSE file.
5 %include "media/base/simd/media_export.asm"
6 %include "third_party/x86inc/x86inc.asm"
9 ; This file uses MMX instructions.
14 ;void LinearScaleYUVToRGB32Row_MMX_X64(const uint8* y_buf,
19 ; ptrdiff_t source_dx);
20 %define SYMBOL LinearScaleYUVToRGB32Row_MMX_X64
25 %assign stack_offset 0
26 extern mangle(kCoefficientsRgbY)
28 ; Parameters are in the following order:
36 PROLOGUE 6, 7, 3, Y, U, V, ARGB, WIDTH, SOURCE_DX, COMPL
59 LOAD_SYM TABLEq, mangle(kCoefficientsRgbY)
61 imul WIDTHq, SOURCE_DXq ; source_width = width * source_dx
63 cmp SOURCE_DXq, 0x20000
65 mov Xq, 0x8000 ; x = 0.5 for 1/2 or less
72 movzx COMPLd, BYTE [Uq + INDEXq]
73 movzx COMPRd, BYTE [Uq + INDEXq + 1]
75 and FRACTIONq, 0x1fffe
76 imul COMPRq, FRACTIONq
77 xor FRACTIONq, 0x1fffe
78 imul COMPLq, FRACTIONq
81 movq mm0, [TABLEq + 2048 + 8 * COMPLq]
84 movzx COMPLd, BYTE [Vq + INDEXq]
85 movzx COMPRd, BYTE [Vq + INDEXq + 1]
86 ; Trick here to imul COMPL first then COMPR.
87 ; Saves two instruction. :)
88 imul COMPLq, FRACTIONq
89 xor FRACTIONq, 0x1fffe
90 imul COMPRq, FRACTIONq
93 paddsw mm0, [TABLEq + 4096 + 8 * COMPLq]
95 ; Interpolate first Y1.
96 lea INDEXq, [Xq + SOURCE_DXq] ; INDEXq now points to next pixel.
97 ; Xq points to current pixel.
100 movzx COMPLd, BYTE [Yq + Xq]
101 movzx COMPRd, BYTE [Yq + Xq + 1]
102 and FRACTIONq, 0xffff
103 imul COMPRq, FRACTIONq
104 xor FRACTIONq, 0xffff
105 imul COMPLq, FRACTIONq
108 movq mm1, [TABLEq + 8 * COMPLq]
110 ; Interpolate Y2 if available.
114 lea Xq, [INDEXq + SOURCE_DXq] ; Xq points to next pixel.
115 ; INDEXq points to current pixel.
116 mov FRACTIONq, INDEXq
118 movzx COMPLd, BYTE [Yq + INDEXq]
119 movzx COMPRd, BYTE [Yq + INDEXq + 1]
120 and FRACTIONq, 0xffff
121 imul COMPRq, FRACTIONq
122 xor FRACTIONq, 0xffff
123 imul COMPLq, FRACTIONq
126 movq mm2, [TABLEq + 8 * COMPLq]