-/* Within a tile:
- */
-
/**
* Run the shader on all blocks in a tile. This is used when a tile is
* completely contained inside a triangle.
unsigned thread_index,
const union lp_rast_cmd_arg arg )
{
- /* Set c1,c2,c3 to large values so the in/out test always passes */
- const int32_t c1 = INT_MIN, c2 = INT_MIN, c3 = INT_MIN;
+ const struct lp_rast_state *state = rast->tasks[thread_index].current_state;
+ struct lp_rast_tile *tile = &rast->tasks[thread_index].tile;
const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
const unsigned tile_x = rast->tasks[thread_index].x;
const unsigned tile_y = rast->tasks[thread_index].y;
LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
- /* Use the existing preference for 4x4 (four quads) shading:
- */
- for (y = 0; y < TILE_SIZE; y += 4)
- for (x = 0; x < TILE_SIZE; x += 4)
- lp_rast_shade_quads( rast,
- thread_index,
- inputs,
- tile_x + x,
- tile_y + y,
- c1, c2, c3);
+ /* render the whole 64x64 tile in 4x4 chunks */
+ for (y = 0; y < TILE_SIZE; y += 4){
+ for (x = 0; x < TILE_SIZE; x += 4) {
+ uint8_t *color[PIPE_MAX_COLOR_BUFS];
+ uint32_t *depth;
+ unsigned block_offset, i;
+
+ /* offset of the 16x16 pixel block within the tile */
+ block_offset = ((y / 4) * (16 * 16) + (x / 4) * 16);
+
+ /* color buffer */
+ for (i = 0; i < rast->state.fb.nr_cbufs; i++)
+ color[i] = tile->color[i] + 4 * block_offset;
+
+ /* depth buffer */
+ depth = tile->depth + block_offset;
+
+ /* run shader */
+ state->jit_function[0]( &state->jit_context,
+ tile_x + x, tile_y + y,
+ inputs->a0,
+ inputs->dadx,
+ inputs->dady,
+ color,
+ depth,
+ INT_MIN, INT_MIN, INT_MIN,
+ NULL, NULL, NULL );
+ }
+ }
}
iy = y % TILE_SIZE;
/* offset of the 16x16 pixel block within the tile */
- block_offset = ((iy/4)*(16*16) + (ix/4)*16);
+ block_offset = ((iy / 4) * (16 * 16) + (ix / 4) * 16);
/* color buffer */
for (i = 0; i < rast->state.fb.nr_cbufs; i++)
#endif
/* run shader */
- state->jit_function( &state->jit_context,
+ state->jit_function[1]( &state->jit_context,
x, y,
inputs->a0,
inputs->dadx,
}
-/* End of tile:
- */
/**
/* The shader itself. Probably we also need to pass a pointer to
* the tile color/z/stencil data somehow:
- */
- lp_jit_frag_func jit_function;
+ * jit_function[0] skips the triangle in/out test code
+ * jit_function[1] does triangle in/out testing
+ */
+ lp_jit_frag_func jit_function[2];
boolean opaque;
};
#include "pipe/p_thread.h"
#include "lp_rast.h"
+#include "lp_tile_soa.h"
#define MAX_THREADS 8 /* XXX probably temporary here */
unsigned x, unsigned y,
int32_t c1, int32_t c2, int32_t c3);
+
+/**
+ * Shade all pixels in a 4x4 block. The fragment code omits the
+ * triangle in/out tests.
+ * \param x, y location of 4x4 block in window coords
+ */
+static INLINE void
+lp_rast_shade_quads_all( struct lp_rasterizer *rast,
+ unsigned thread_index,
+ const struct lp_rast_shader_inputs *inputs,
+ unsigned x, unsigned y )
+{
+ const struct lp_rast_state *state = rast->tasks[thread_index].current_state;
+ struct lp_rast_tile *tile = &rast->tasks[thread_index].tile;
+ const unsigned ix = x % TILE_SIZE, iy = y % TILE_SIZE;
+ uint8_t *color[PIPE_MAX_COLOR_BUFS];
+ void *depth;
+ unsigned block_offset, i;
+
+ /* offset of the containing 16x16 pixel block within the tile */
+ block_offset = (iy / 4) * (16 * 16) + (ix / 4) * 16;
+
+ /* color buffer */
+ for (i = 0; i < rast->state.fb.nr_cbufs; i++)
+ color[i] = tile->color[i] + 4 * block_offset;
+
+ /* depth buffer */
+ depth = tile->depth + block_offset;
+
+ /* run shader */
+ state->jit_function[0]( &state->jit_context,
+ x, y,
+ inputs->a0,
+ inputs->dadx,
+ inputs->dady,
+ color,
+ depth,
+ INT_MIN, INT_MIN, INT_MIN,
+ NULL, NULL, NULL );
+}
+
+
#endif
const struct lp_rast_triangle *tri,
int x, int y )
{
- /* Set c1,c2,c3 to large values so the in/out test always passes */
- const int32_t c1 = INT_MIN, c2 = INT_MIN, c3 = INT_MIN;
- lp_rast_shade_quads(rast_task->rast,
- rast_task->thread_index,
- &tri->inputs,
- x, y,
- c1, c2, c3);
+ lp_rast_shade_quads_all(rast_task->rast,
+ rast_task->thread_index,
+ &tri->inputs,
+ x, y);
}
}
void
-lp_setup_set_fs_function( struct setup_context *setup,
- lp_jit_frag_func jit_function,
- boolean opaque )
+lp_setup_set_fs_functions( struct setup_context *setup,
+ lp_jit_frag_func jit_function0,
+ lp_jit_frag_func jit_function1,
+ boolean opaque )
{
- LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) jit_function);
+ LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) jit_function0);
/* FIXME: reference count */
- setup->fs.current.jit_function = jit_function;
+ setup->fs.current.jit_function[0] = jit_function0;
+ setup->fs.current.jit_function[1] = jit_function1;
setup->fs.current.opaque = opaque;
setup->dirty |= LP_SETUP_NEW_FS;
}
unsigned nr );
void
-lp_setup_set_fs_function( struct setup_context *setup,
- lp_jit_frag_func jit_function,
- boolean opaque );
+lp_setup_set_fs_functions( struct setup_context *setup,
+ lp_jit_frag_func jit_function0,
+ lp_jit_frag_func jit_function1,
+ boolean opaque );
void
lp_setup_set_fs_constants(struct setup_context *setup,
struct lp_fragment_shader_variant_key key;
- LLVMValueRef function;
+ LLVMValueRef function[2];
- lp_jit_frag_func jit_function;
+ lp_jit_frag_func jit_function[2];
struct lp_fragment_shader_variant *next;
};
}
+static LLVMValueRef
+build_int32_vec_const(int value)
+{
+ struct lp_type i32_type;
+
+ memset(&i32_type, 0, sizeof i32_type);
+ i32_type.floating = FALSE; /* values are integers */
+ i32_type.sign = TRUE; /* values are signed */
+ i32_type.norm = FALSE; /* values are not normalized */
+ i32_type.width = 32; /* 32-bit int values */
+ i32_type.length = 4; /* 4 elements per vector */
+ return lp_build_int_const_scalar(i32_type, value);
+}
+
+
+
/**
* Generate the fragment shader, depth/stencil test, and alpha tests.
* \param i which quad in the tile, in range [0,3]
+ * \param do_tri_test if 1, do triangle edge in/out testing
*/
static void
generate_fs(struct llvmpipe_context *lp,
LLVMValueRef *pmask,
LLVMValueRef (*color)[4],
LLVMValueRef depth_ptr,
+ unsigned do_tri_test,
LLVMValueRef c0,
LLVMValueRef c1,
LLVMValueRef c2,
lp_build_flow_scope_declare(flow, &z);
/* do triangle edge testing */
- generate_tri_edge_mask(builder, i, pmask,
- c0, c1, c2, step0_ptr, step1_ptr, step2_ptr);
+ if (do_tri_test) {
+ generate_tri_edge_mask(builder, i, pmask,
+ c0, c1, c2, step0_ptr, step1_ptr, step2_ptr);
+ }
+ else {
+ *pmask = build_int32_vec_const(~0);
+ }
/* 'mask' will control execution based on quad's pixel alive/killed state */
lp_build_mask_begin(&mask, flow, type, *pmask);
static void
generate_fragment(struct llvmpipe_context *lp,
struct lp_fragment_shader *shader,
- struct lp_fragment_shader_variant *variant)
+ struct lp_fragment_shader_variant *variant,
+ unsigned do_tri_test)
{
struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
const struct lp_fragment_shader_variant_key *key = &variant->key;
function = LLVMAddFunction(screen->module, "shader", func_type);
LLVMSetFunctionCallConv(function, LLVMCCallConv);
- variant->function = function;
+ variant->function[do_tri_test] = function;
/* XXX: need to propagate noalias down into color param now we are
&fs_mask[i], /* output */
out_color,
depth_ptr_i,
+ do_tri_test,
c0, c1, c2,
step0_ptr, step1_ptr, step2_ptr);
/*
* Translate the LLVM IR into machine code.
*/
- variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, function);
+ variant->jit_function[do_tri_test] = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, function);
if (LP_DEBUG & DEBUG_ASM)
- lp_disassemble(variant->jit_function);
+ lp_disassemble(variant->jit_function[do_tri_test]);
}
variant->shader = shader;
memcpy(&variant->key, key, sizeof *key);
- generate_fragment(lp, shader, variant);
+ generate_fragment(lp, shader, variant, 0);
+ generate_fragment(lp, shader, variant, 1);
/* insert new variant into linked list */
variant->next = shader->variants;
variant = shader->variants;
while(variant) {
struct lp_fragment_shader_variant *next = variant->next;
+ unsigned i;
- if(variant->function) {
- if(variant->jit_function)
- LLVMFreeMachineCodeForFunction(screen->engine, variant->function);
- LLVMDeleteFunction(variant->function);
+ for (i = 0; i < Elements(variant->function); i++) {
+ if (variant->function[i]) {
+ if (variant->jit_function[i])
+ LLVMFreeMachineCodeForFunction(screen->engine,
+ variant->function[i]);
+ LLVMDeleteFunction(variant->function[i]);
+ }
}
FREE(variant);
!shader->info.uses_kill
? TRUE : FALSE;
- lp_setup_set_fs_function(lp->setup,
- shader->current->jit_function,
- opaque);
+ lp_setup_set_fs_functions(lp->setup,
+ shader->current->jit_function[0],
+ shader->current->jit_function[1],
+ opaque);
}