1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "util/u_memory.h"
30 #include "pipe/p_shader_tokens.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_util.h"
33 #include "tgsi/tgsi_exec.h"
35 #include "draw_vs_aos.h"
36 #include "draw_vertex.h"
38 #include "rtasm/rtasm_x86sse.h"
42 /* Note - don't yet have to worry about interacting with the code in
43 * draw_vs_aos.c as there is no intermingling of generated code...
44 * That may have to change, we'll see.
46 static void emit_load_R32G32B32A32( struct aos_compilation *cp,
48 struct x86_reg src_ptr )
50 sse_movups(cp->func, data, src_ptr);
53 static void emit_load_R32G32B32( struct aos_compilation *cp,
55 struct x86_reg src_ptr )
58 sse_movss(cp->func, data, x86_make_disp(src_ptr, 8));
60 sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
62 sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) );
64 sse_movlps(cp->func, data, src_ptr);
67 sse_movups(cp->func, data, src_ptr);
69 sse2_pshufd(cp->func, data, data, SHUF(W,X,Y,Z) );
71 sse_movss(cp->func, data, aos_get_internal_xmm( cp, IMM_ONES ) );
73 sse2_pshufd(cp->func, data, data, SHUF(Y,Z,W,X) );
78 static void emit_load_R32G32( struct aos_compilation *cp,
80 struct x86_reg src_ptr )
82 sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
83 sse_movlps(cp->func, data, src_ptr);
87 static void emit_load_R32( struct aos_compilation *cp,
89 struct x86_reg src_ptr )
91 sse_movss(cp->func, data, src_ptr);
92 sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
96 static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
98 struct x86_reg src_ptr )
100 sse_movss(cp->func, data, src_ptr);
101 sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
102 sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
103 sse2_cvtdq2ps(cp->func, data, data);
104 sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255));
109 /* Extended swizzles? Maybe later.
111 static void emit_swizzle( struct aos_compilation *cp,
116 sse_shufps(cp->func, dest, src, shuffle);
121 static boolean get_buffer_ptr( struct aos_compilation *cp,
127 struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
128 buf_idx * sizeof(struct aos_buffer));
130 struct x86_reg buf_stride = x86_make_disp(buf,
131 Offset(struct aos_buffer, stride));
133 struct x86_reg buf_ptr = x86_make_disp(buf,
134 Offset(struct aos_buffer, ptr));
137 /* Calculate pointer to current attrib:
139 x86_mov(cp->func, ptr, buf_ptr);
140 x86_mov(cp->func, elt, buf_stride);
141 x86_add(cp->func, elt, ptr);
142 if (buf_idx == 0) sse_prefetchnta(cp->func, x86_make_disp(elt, 192));
143 x86_mov(cp->func, buf_ptr, elt);
146 struct x86_reg buf_base_ptr = x86_make_disp(buf,
147 Offset(struct aos_buffer, base_ptr));
150 /* Calculate pointer to current attrib:
152 x86_mov(cp->func, ptr, buf_stride);
153 x86_imul(cp->func, ptr, elt);
154 x86_add(cp->func, ptr, buf_base_ptr);
163 static boolean load_input( struct aos_compilation *cp,
165 struct x86_reg bufptr )
167 unsigned format = cp->vaos->base.key.element[idx].in.format;
168 unsigned offset = cp->vaos->base.key.element[idx].in.offset;
169 struct x86_reg dataXMM = aos_get_xmm_reg(cp);
171 /* Figure out source pointer address:
173 struct x86_reg src = x86_make_disp(bufptr, offset);
175 aos_adopt_xmm_reg( cp,
182 case PIPE_FORMAT_R32_FLOAT:
183 emit_load_R32(cp, dataXMM, src);
185 case PIPE_FORMAT_R32G32_FLOAT:
186 emit_load_R32G32(cp, dataXMM, src);
188 case PIPE_FORMAT_R32G32B32_FLOAT:
189 emit_load_R32G32B32(cp, dataXMM, src);
191 case PIPE_FORMAT_R32G32B32A32_FLOAT:
192 emit_load_R32G32B32A32(cp, dataXMM, src);
194 case PIPE_FORMAT_A8R8G8B8_UNORM:
195 emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
196 emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
198 case PIPE_FORMAT_R8G8B8A8_UNORM:
199 emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
202 AOS_ERROR(cp, "unhandled input format");
209 static boolean load_inputs( struct aos_compilation *cp,
215 for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) {
216 if (cp->vaos->base.key.element[i].in.buffer == buffer) {
218 if (!load_input( cp, i, ptr ))
228 boolean aos_init_inputs( struct aos_compilation *cp, boolean linear )
231 for (i = 0; i < cp->vaos->nr_vb; i++) {
232 struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
233 i * sizeof(struct aos_buffer));
235 struct x86_reg buf_base_ptr = x86_make_disp(buf,
236 Offset(struct aos_buffer, base_ptr));
238 if (cp->vaos->base.key.const_vbuffers & (1<<i)) {
239 struct x86_reg ptr = cp->tmp_EAX;
241 x86_mov(cp->func, ptr, buf_base_ptr);
243 /* Load all inputs for this constant vertex buffer
245 load_inputs( cp, i, x86_deref(ptr) );
247 /* Then just force them out to aos_machine.input[]
254 struct x86_reg elt = cp->idx_EBX;
255 struct x86_reg ptr = cp->tmp_EAX;
257 struct x86_reg buf_stride = x86_make_disp(buf,
258 Offset(struct aos_buffer, stride));
260 struct x86_reg buf_ptr = x86_make_disp(buf,
261 Offset(struct aos_buffer, ptr));
264 /* Calculate pointer to current attrib:
266 x86_mov(cp->func, ptr, buf_stride);
267 x86_imul(cp->func, ptr, elt);
268 x86_add(cp->func, ptr, buf_base_ptr);
271 /* In the linear case, keep the buffer pointer instead of the
274 if (cp->vaos->nr_vb == 1)
275 x86_mov( cp->func, elt, ptr );
277 x86_mov( cp->func, buf_ptr, ptr );
286 boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
290 for (j = 0; j < cp->vaos->nr_vb; j++) {
291 if (cp->vaos->base.key.const_vbuffers & (1<<j)) {
292 /* just retreive pre-transformed input */
294 else if (linear && cp->vaos->nr_vb == 1) {
295 load_inputs( cp, 0, cp->idx_EBX );
298 struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX);
299 struct x86_reg ptr = cp->tmp_EAX;
301 if (!get_buffer_ptr( cp, linear, j, elt, ptr ))
304 if (!load_inputs( cp, j, ptr ))
312 boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear )
314 if (linear && cp->vaos->nr_vb == 1) {
315 struct x86_reg stride = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
316 (0 * sizeof(struct aos_buffer) +
317 Offset(struct aos_buffer, stride)));
319 x86_add(cp->func, cp->idx_EBX, stride);
320 sse_prefetchnta(cp->func, x86_make_disp(cp->idx_EBX, 192));
326 x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4));
337 static void emit_store_R32G32B32A32( struct aos_compilation *cp,
338 struct x86_reg dst_ptr,
339 struct x86_reg dataXMM )
341 sse_movups(cp->func, dst_ptr, dataXMM);
344 static void emit_store_R32G32B32( struct aos_compilation *cp,
345 struct x86_reg dst_ptr,
346 struct x86_reg dataXMM )
348 sse_movlps(cp->func, dst_ptr, dataXMM);
349 sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
350 sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM);
353 static void emit_store_R32G32( struct aos_compilation *cp,
354 struct x86_reg dst_ptr,
355 struct x86_reg dataXMM )
357 sse_movlps(cp->func, dst_ptr, dataXMM);
360 static void emit_store_R32( struct aos_compilation *cp,
361 struct x86_reg dst_ptr,
362 struct x86_reg dataXMM )
364 sse_movss(cp->func, dst_ptr, dataXMM);
369 static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp,
370 struct x86_reg dst_ptr,
371 struct x86_reg dataXMM )
373 sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255));
374 sse2_cvtps2dq(cp->func, dataXMM, dataXMM);
375 sse2_packssdw(cp->func, dataXMM, dataXMM);
376 sse2_packuswb(cp->func, dataXMM, dataXMM);
377 sse_movss(cp->func, dst_ptr, dataXMM);
384 static boolean emit_output( struct aos_compilation *cp,
386 struct x86_reg dataXMM,
387 enum attrib_emit format )
392 emit_store_R32(cp, ptr, dataXMM);
395 emit_store_R32G32(cp, ptr, dataXMM);
398 emit_store_R32G32B32(cp, ptr, dataXMM);
401 emit_store_R32G32B32A32(cp, ptr, dataXMM);
404 emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
407 emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
408 emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
411 AOS_ERROR(cp, "unhandled output format");
420 boolean aos_emit_outputs( struct aos_compilation *cp )
424 for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) {
425 enum attrib_emit format = cp->vaos->base.key.element[i].out.format;
426 unsigned offset = cp->vaos->base.key.element[i].out.offset;
427 unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output;
431 if (format == EMIT_1F_PSIZE) {
432 data = aos_get_internal_xmm( cp, IMM_PSIZE );
435 data = aos_get_shader_reg( cp,
440 if (data.file != file_XMM) {
441 struct x86_reg tmp = aos_get_xmm_reg( cp );
442 sse_movaps(cp->func, tmp, data);
446 if (!emit_output( cp,
447 x86_make_disp( cp->outbuf_ECX, offset ),
452 aos_release_xmm_reg( cp, data.idx );