2 #if !defined(NASM_ASSEMBLER) && !defined(MASM_ASSEMBLER)
3 #define TAGLLBL(a) TAG(.L##a)
5 #define TAGLLBL(a) TAG(a)
10 #define GR_VERTEX_X_OFFSET 0
11 #define GR_VERTEX_Y_OFFSET 4
12 #define GR_VERTEX_Z_OFFSET 8
13 #define GR_VERTEX_R_OFFSET 12
14 #define GR_VERTEX_G_OFFSET 16
15 #define GR_VERTEX_B_OFFSET 20
16 #define GR_VERTEX_OOZ_OFFSET 24
17 #define GR_VERTEX_A_OFFSET 28
18 #define GR_VERTEX_OOW_OFFSET 32
22 #define GR_VERTEX_X_OFFSET 0
23 #define GR_VERTEX_Y_OFFSET 4
24 #define GR_VERTEX_OOZ_OFFSET 8
25 #define GR_VERTEX_OOW_OFFSET 12
26 #define GR_VERTEX_R_OFFSET 16
27 #define GR_VERTEX_G_OFFSET 20
28 #define GR_VERTEX_B_OFFSET 24
29 #define GR_VERTEX_A_OFFSET 28
30 #define GR_VERTEX_Z_OFFSET 32
34 #define GR_VERTEX_SOW_TMU0_OFFSET 36
35 #define GR_VERTEX_TOW_TMU0_OFFSET 40
36 #define GR_VERTEX_OOW_TMU0_OFFSET 44
37 #define GR_VERTEX_SOW_TMU1_OFFSET 48
38 #define GR_VERTEX_TOW_TMU1_OFFSET 52
39 #define GR_VERTEX_OOW_TMU1_OFFSET 56
44 /*#define MAT_SX 0 /* accessed by REGIND !! */
54 /* Do viewport map, device scale and perspective projection.
56 * void project_verts( GLfloat *first,
62 * Rearrange fxVertices to look like grVertices.
65 GLOBL GLNAME( TAG(fx_3dnow_project_vertices) )
66 GLNAME( TAG(fx_3dnow_project_vertices) ):
70 MOV_L ( REGOFF(8, ESP), ECX ) /* first_vert */
71 MOV_L ( REGOFF(12, ESP), EDX ) /* last_vert */
74 JE ( TAGLLBL(FXPV_end) )
78 PREFETCH ( REGIND(ECX) ) /* fetch the first vertex */
80 MOV_L ( REGOFF(16, ESP), EBP ) /* matrix */
81 MOV_L ( REGOFF(20, ESP), EAX ) /* stride */
83 MOVD ( REGOFF(MAT_TX, EBP), MM6 ) /* | tx */
84 PUNPCKLDQ ( REGOFF(MAT_TY, EBP), MM6 ) /* ty | tx */
87 MOV_L ( CONST(0x49400000), REGOFF(-8, ESP) ) /* snapper */
88 MOV_L ( CONST(0x49400000), REGOFF(-4, ESP) ) /* snapper */
91 MOVQ ( REGOFF(-8, ESP), MM4 ) /* snapper | snapper */
92 PFADD ( MM4, MM6 ) /* ty+snapper | tx+snapper */
94 MOVD ( REGIND(EBP), MM5 )
95 PUNPCKLDQ ( REGOFF(MAT_SY, EBP), MM5 ) /* vsy | vsx */
97 MOVD ( REGOFF(MAT_SZ, EBP), MM1 ) /* | vsz */
101 TAGLLBL(FXPV_loop_start):
103 PREFETCH ( REGOFF(64, ECX) ) /* fetch the next-ish vertex */
106 MOVD ( REGOFF(12, ECX), MM0 ) /* | f[3] */
107 PFRCP ( MM0, MM0 ) /* oow = 1/f[3] */
109 MOVD ( REGOFF(12, ECX), MM7 ) /* | f[3] */
110 PFRCPIT1 ( MM0, MM7 )
111 PFRCPIT2 ( MM0, MM7 ) /* oow | oow */
113 PUNPCKLDQ ( MM7, MM7 )
116 #if (TYPE & SETUP_RGBA)
117 MOVD ( REGOFF(CLIP_R, ECX ), MM0 ) /* f[RCOORD] = f[CLIP_R]; */
118 MOVD ( MM0, REGOFF(GR_VERTEX_R_OFFSET, ECX) )
121 #if (TYPE & SETUP_TMU1)
122 MOVQ ( REGOFF(CLIP_S1, ECX), MM0 ) /* f[S1COORD] = f[CLIP_S1] * oow */
123 PFMUL ( MM7, MM0 ) /* f[T1COORD] = f[CLIP_T1] * oow */
124 MOVQ ( MM0, REGOFF(GR_VERTEX_SOW_TMU1_OFFSET, ECX) )
128 #if (TYPE & SETUP_TMU0)
129 MOVQ ( REGOFF(CLIP_S0, ECX), MM0 ) /* f[S0COORD] = f[CLIP_S0] * oow */
130 PFMUL ( MM7, MM0 ) /* f[T0COORD] = f[CLIP_T0] * oow */
131 MOVQ ( MM0, REGOFF(GR_VERTEX_SOW_TMU0_OFFSET, ECX) )
140 MOVQ ( REGIND(ECX), MM2 ) /* f[1] | f[0] */
141 PFMUL ( MM7, MM2 ) /* f[1] * oow | f[0] * oow */
143 MOVD ( REGOFF(8, ECX), MM3 ) /* | f[2] */
144 PFMUL ( MM7, MM3 ) /* | f[2] * oow */
146 MOVD ( REGOFF(MAT_TZ, EBP), MM0 ) /* | vtz */
147 PFMUL ( MM1, MM3 ) /* | f[2] *= vsz */
149 PFADD ( MM0, MM3 ) /* | f[2] += vtz */
150 PFMUL ( MM5, MM2 ) /* f[1] *= vsy | f[0] *= vsx */
152 PFADD ( MM6, MM2 ) /* f[1] += vty | f[0] += vtx */
155 PFSUB ( MM4, MM2 ) /* f[0,1] -= snapper */
158 MOVQ ( MM2, REGOFF(GR_VERTEX_X_OFFSET, ECX) )
159 MOVD ( MM3, REGOFF(GR_VERTEX_OOZ_OFFSET, ECX) )
162 /* end of DO_SETUP_XYZ */
164 MOVD ( MM7, REGOFF(GR_VERTEX_OOW_OFFSET, ECX) ) /* f[OOWCOORD] = oow */
165 ADD_L ( EAX, ECX ) /* f += stride */
167 CMP_L ( ECX, EDX ) /* stall??? */
168 JA ( TAGLLBL(FXPV_loop_start) )
181 /* void project_verts( GLfloat *first,
185 * const GLubyte *mask )
189 GLOBL GLNAME( TAG(fx_3dnow_project_clipped_vertices) )
190 GLNAME( TAG(fx_3dnow_project_clipped_vertices) ):
194 MOV_L ( REGOFF(8, ESP), ECX ) /* first FXDRIVER(VB)->verts*/
195 MOV_L ( REGOFF(12, ESP), EDX ) /* last FXDRIVER(VB)->last_vert */
202 PREFETCH ( REGIND(ECX) ) /* fetch the first vertex */
204 MOV_L ( REGOFF(24, ESP), EBP ) /* mat ctx->Viewport.WindowMap.M */
205 MOV_L ( REGOFF(28, ESP), EAX ) /* stride */
206 MOV_L ( REGOFF(32, ESP), ESI ) /* VB->ClipMask */
208 MOVD ( REGOFF(MAT_TX, EBP), MM6 ) /* | tx */
209 PUNPCKLDQ ( REGOFF(MAT_TY, EBP), MM6 ) /* ty | tx */
212 MOV_L ( CONST(0x49400000), REGOFF(-8, ESP) ) /* snapper */
213 MOV_L ( CONST(0x49400000), REGOFF(-4, ESP) ) /* snapper */
216 MOVQ ( REGOFF(-8, ESP), MM4 ) /* snapper | snapper */
217 PFADD ( MM4, MM6 ) /* ty+snapper | tx+snapper */
219 MOVD ( REGIND(EBP), MM5 )
220 PUNPCKLDQ ( REGOFF(MAT_SY, EBP), MM5 ) /* vsy | vsx */
222 MOVD ( REGOFF(MAT_SZ, EBP), MM1 ) /* | vsz */
227 TAGLLBL(FXPCV_loop_start):
229 PREFETCH ( REGOFF(64, ECX) ) /* fetch the next-ish vertex */
231 CMP_B ( CONST(0), REGIND(ESI) )
232 JNE ( TAGLLBL(FXPCV_skip) )
234 MOVD ( REGOFF(12, ECX), MM0) /* | f[3] */
235 PFRCP ( MM0, MM0 ) /* oow = 1/f[3] */
237 MOVD ( REGOFF(12, ECX), MM7) /* | f[3] */
238 PFRCPIT1 ( MM0, MM7 )
239 PFRCPIT2 ( MM0, MM7 ) /* oow | oow */
241 PUNPCKLDQ ( MM7, MM7 )
244 #if (TYPE & SETUP_RGBA)
245 MOVD ( REGOFF(CLIP_R, ECX ), MM0 ) /* f[RCOORD] = f[CLIP_R]; */
246 MOVD ( MM0, REGOFF(GR_VERTEX_R_OFFSET, ECX) )
249 #if (TYPE & SETUP_TMU1)
250 MOVQ ( REGOFF(CLIP_S1, ECX), MM0 ) /* f[S1COORD] = f[CLIP_S1] * oow */
251 PFMUL ( MM7, MM0 ) /* f[T1COORD] = f[CLIP_T1] * oow */
252 MOVQ ( MM0, REGOFF(GR_VERTEX_SOW_TMU1_OFFSET, ECX) )
256 #if (TYPE & SETUP_TMU0)
257 MOVQ ( REGOFF(CLIP_S0, ECX), MM0 ) /* f[S0COORD] = f[CLIP_S0] * oow */
258 PFMUL ( MM7, MM0 ) /* f[T0COORD] = f[CLIP_T0] * oow */
259 MOVQ ( MM0, REGOFF(GR_VERTEX_SOW_TMU0_OFFSET, ECX) )
267 MOVQ ( REGIND(ECX), MM2 ) /* f[1] | f[0] */
268 PFMUL ( MM7, MM2 ) /* f[1] * oow | f[0] * oow */
270 MOVD ( REGOFF(8, ECX), MM3 ) /* | f[2] */
271 PFMUL ( MM7, MM3 ) /* | f[2] * oow */
273 MOVD ( REGOFF(MAT_TZ, EBP), MM0 ) /* | vtz */
274 PFMUL ( MM1, MM3 ) /* | f[2] *= vsz */
276 PFADD ( MM0, MM3 ) /* | f[2] += vtz */
277 PFMUL ( MM5, MM2 ) /* f[1] *= vsy | f[0] *= vsx */
279 PFADD ( MM6, MM2 ) /* f[1] += vty | f[0] += vtx */
282 PFSUB ( MM4, MM2 ) /* f[0,1] -= snapper */
285 MOVQ ( MM2, REGOFF(GR_VERTEX_X_OFFSET, ECX) )
286 MOVD ( MM3, REGOFF(GR_VERTEX_OOZ_OFFSET, ECX) )
289 /* end of DO_SETUP_XYZ */
291 MOVD ( MM7, REGOFF(GR_VERTEX_OOW_OFFSET, ECX) ) /* f[OOWCOORD] = oow */
294 ADD_L ( EAX, ECX ) /* f += stride */
296 INC_L ( ESI ) /* next ClipMask */
298 JA ( TAGLLBL(FXPCV_loop_start) )