2 ; Copyright (C) <2001> Jean-Christophe Hoelt <jeko@free.fr>
4 ; This library is free software; you can redistribute it and/or
5 ; modify it under the terms of the GNU Library General Public
6 ; License as published by the Free Software Foundation; either
7 ; version 2 of the License, or (at your option) any later version.
9 ; This library is distributed in the hope that it will be useful,
10 ; but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 ; Library General Public License for more details.
14 ; You should have received a copy of the GNU Library General Public
15 ; License along with this library; if not, write to the
16 ; Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17 ; Boston, MA 02110-1301, USA.
21 ;// 07/01/2001 : Changing FEMMS to EMMS : slower... but run on intel machines
22 ;// 03/01/2001 : WIDTH and HEIGHT are now variable
23 ;// 28/12/2000 : adding comments to the code, suppress some useless lines
24 ;// 27/12/2000 : reducing memory access... improving performance by 20%
25 ;// coefficients are now on 1 byte
26 ;// 22/12/2000 : Changing data structure
27 ;// 16/12/2000 : AT&T version
28 ;// 14/12/2000 : unrolling loop
29 ;// 12/12/2000 : 64 bits memory access
35 .string "pos = %d\n\0"
44 .globl mmx_zoom ;// name of the function to call by C program
45 /* .extern coeffs ;// the transformation buffer */
46 .extern expix1,expix2 ;// the source and destination buffer
47 .extern mmx_zoom_size, zoom_width ;// size of the buffers
49 .extern brutS,brutD,buffratio,precalCoef,prevX,prevY
52 /* faire : a / sqrtperte <=> a >> PERTEDEC*/
72 ;// initialisation du mm7 � zero
75 movl mmx_zoom_size, %ecx
79 ;// esi <- nouvelle position
81 leal (%eax, %ecx, 8),%eax
83 movl (%eax),%edx /* = brutS.px (brutSmypos) */
84 movl 4(%eax),%eax /* = brutS.py */
87 leal (%ebx, %ecx, 8),%ebx
92 addl %edx,%esi /* esi = px */
94 /* eax contient deja brutS.py = le nouveau brutSmypos*/
95 /* ebx pointe sur brutD[myPos] */
100 addl %eax,%edi /* edi = py */
108 andl $15,%eax /* eax = coefh */
110 andl $15,%ebx /* ebx = coefv */
115 movl $precalCoef,%ebx
116 /* movd (%eax,%ebx),%mm6*/ /* mm6 = coeffs */
125 imull zoom_width,%edi
126 leal (%esi,%edi),%esi
133 /** apres ce calcul, %esi = pos, %mm6 = coeffs **/
141 ;// recuperation des deux premiers pixels dans mm0 et mm1
142 /* movq (%eax,%esi,4), %mm0 /* b1-v1-r1-a1-b2-v2-r2-a2 */
143 movq %mm0, %mm1 /* b1-v1-r1-a1-b2-v2-r2-a2 */
145 ;// depackage du premier pixel
146 punpcklbw %mm7, %mm0 /* 00-b2-00-v2-00-r2-00-a2 */
148 movq %mm6, %mm5 /* ??-??-??-??-c4-c3-c2-c1 */
149 ;// depackage du 2ieme pixel
150 punpckhbw %mm7, %mm1 /* 00-b1-00-v1-00-r1-00-a1 */
152 ;// extraction des coefficients...
153 punpcklbw %mm5, %mm6 /* c4-c4-c3-c3-c2-c2-c1-c1 */
154 movq %mm6, %mm4 /* c4-c4-c3-c3-c2-c2-c1-c1 */
155 movq %mm6, %mm5 /* c4-c4-c3-c3-c2-c2-c1-c1 */
157 punpcklbw %mm5, %mm6 /* c2-c2-c2-c2-c1-c1-c1-c1 */
158 punpckhbw %mm5, %mm4 /* c4-c4-c4-c4-c3-c3-c3-c3 */
160 movq %mm6, %mm3 /* c2-c2-c2-c2-c1-c1-c1-c1 */
161 punpcklbw %mm7, %mm6 /* 00-c1-00-c1-00-c1-00-c1 */
162 punpckhbw %mm7, %mm3 /* 00-c2-00-c2-00-c2-00-c2 */
164 ;// multiplication des pixels par les coefficients
165 pmullw %mm6, %mm0 /* c1*b2-c1*v2-c1*r2-c1*a2 */
166 pmullw %mm3, %mm1 /* c2*b1-c2*v1-c2*r1-c2*a1 */
169 ;// ...extraction des 2 derniers coefficients
170 movq %mm4, %mm5 /* c4-c4-c4-c4-c3-c3-c3-c3 */
171 punpcklbw %mm7, %mm4 /* 00-c3-00-c3-00-c3-00-c3 */
172 punpckhbw %mm7, %mm5 /* 00-c4-00-c4-00-c4-00-c4 */
174 /* ajouter la longueur de ligne a esi */
177 ;// recuperation des 2 derniers pixels
178 /* movq (%eax,%esi,4), %mm1*/
181 ;// depackage des pixels
185 ;// multiplication pas les coeffs
189 ;// ajout des valeurs obtenues � la valeur finale
193 ;// division par 256 = 16+16+16+16, puis repackage du pixel final
197 ;// passage au suivant
199 ;// enregistrement du resultat
201 /* movd %mm0,(%eax,%ecx,4)*/
204 ;// test de fin du tantque
205 cmpl $0, %ecx ;// 400x300