From de9f8e8b717aa4b4ab94af73be5aa70088cd6b81 Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 2 Jan 2008 18:53:33 -0700 Subject: [PATCH] Cell: basic triangle rendering works. The cell "render_stage" (last in the "draw" pipeline) emits vertices into a buffer which is pulled by the SPUs in response to a "RENDER" command. This is pretty much temporary/scaffold code for now. --- src/mesa/pipe/cell/common.h | 21 ++++++++ src/mesa/pipe/cell/ppu/cell_context.c | 5 +- src/mesa/pipe/cell/ppu/cell_context.h | 3 +- src/mesa/pipe/cell/ppu/cell_flush.c | 3 ++ src/mesa/pipe/cell/ppu/cell_render.c | 73 +++++++++++++++++++++++++++ src/mesa/pipe/cell/ppu/cell_render.h | 3 ++ src/mesa/pipe/cell/ppu/cell_surface.c | 8 ++- src/mesa/pipe/cell/spu/main.c | 95 ++++++++++++++++++++++++++++++++--- src/mesa/pipe/cell/spu/main.h | 13 +++-- src/mesa/pipe/cell/spu/tri.c | 21 ++++++-- src/mesa/pipe/cell/spu/tri.h | 4 ++ src/mesa/pipe/xlib/xm_winsys.c | 9 +++- 12 files changed, 240 insertions(+), 18 deletions(-) diff --git a/src/mesa/pipe/cell/common.h b/src/mesa/pipe/cell/common.h index f7f1e2e..0868e8d 100644 --- a/src/mesa/pipe/cell/common.h +++ b/src/mesa/pipe/cell/common.h @@ -51,6 +51,7 @@ #define CELL_CMD_CLEAR_TILES 3 #define CELL_CMD_TRIANGLE 4 #define CELL_CMD_FINISH 5 +#define CELL_CMD_RENDER 6 /** @@ -80,12 +81,22 @@ struct cell_command_triangle } ALIGN16_ATTRIB; +struct cell_command_render +{ + uint prim_type; + uint num_verts; + float xmin, ymin, xmax, ymax; + void *vertex_data; +} ALIGN16_ATTRIB; + + /** XXX unions don't seem to work */ struct cell_command { struct cell_command_framebuffer fb; struct cell_command_clear_tiles clear; struct cell_command_triangle tri; + struct cell_command_render render; } ALIGN16_ATTRIB; @@ -98,6 +109,16 @@ struct cell_init_info } ALIGN16_ATTRIB; +/** Temporary */ +#define CELL_MAX_VERTS 48 +#define CELL_MAX_ATTRIBS 2 +struct cell_prim_buffer +{ + float vertex[CELL_MAX_VERTS][CELL_MAX_ATTRIBS][4] ALIGN16_ATTRIB; + float xmin, ymin, xmax, ymax; + uint num_verts; +} ALIGN16_ATTRIB; + #endif /* CELL_COMMON_H */ diff --git a/src/mesa/pipe/cell/ppu/cell_context.c b/src/mesa/pipe/cell/ppu/cell_context.c index 82b69ac..fb89837 100644 --- a/src/mesa/pipe/cell/ppu/cell_context.c +++ b/src/mesa/pipe/cell/ppu/cell_context.c @@ -161,10 +161,13 @@ cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) { struct cell_context *cell; - cell = CALLOC_STRUCT(cell_context); + /* some fields need to be 16-byte aligned, so align the whole object */ + cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); if (!cell) return NULL; + memset(cell, 0, sizeof(*cell)); + cell->winsys = cws; cell->pipe.winsys = winsys; cell->pipe.destroy = cell_destroy_context; diff --git a/src/mesa/pipe/cell/ppu/cell_context.h b/src/mesa/pipe/cell/ppu/cell_context.h index 5f6f987..be98582 100644 --- a/src/mesa/pipe/cell/ppu/cell_context.h +++ b/src/mesa/pipe/cell/ppu/cell_context.h @@ -34,6 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/draw/draw_vertex.h" #include "cell_winsys.h" +#include "pipe/cell/common.h" struct cell_vertex_shader_state @@ -90,7 +91,7 @@ struct cell_context uint num_spus; - + struct cell_prim_buffer prim_buffer; }; diff --git a/src/mesa/pipe/cell/ppu/cell_flush.c b/src/mesa/pipe/cell/ppu/cell_flush.c index b1ff0e5..47003be 100644 --- a/src/mesa/pipe/cell/ppu/cell_flush.c +++ b/src/mesa/pipe/cell/ppu/cell_flush.c @@ -29,6 +29,7 @@ #include "cell_context.h" #include "cell_flush.h" #include "cell_spu.h" +#include "cell_render.h" void @@ -39,6 +40,8 @@ cell_flush(struct pipe_context *pipe, unsigned flags) printf("%s\n", __FUNCTION__); + cell_flush_prim_buffer(cell); + /* Send CMD_FINISH to all SPUs */ for (i = 0; i < cell->num_spus; i++) { send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FINISH); diff --git a/src/mesa/pipe/cell/ppu/cell_render.c b/src/mesa/pipe/cell/ppu/cell_render.c index c7f0cf6..672406a 100644 --- a/src/mesa/pipe/cell/ppu/cell_render.c +++ b/src/mesa/pipe/cell/ppu/cell_render.c @@ -32,6 +32,7 @@ #include "cell_context.h" #include "cell_render.h" +#include "cell_spu.h" #include "pipe/p_util.h" #include "pipe/draw/draw_private.h" @@ -89,13 +90,85 @@ render_line(struct draw_stage *stage, struct prim_header *prim) } +/** Write a vertex into the prim buffer */ +static void +save_vertex(struct cell_prim_buffer *buf, uint pos, + const struct vertex_header *vert) +{ + uint attr, j; + + for (attr = 0; attr < 2; attr++) { + for (j = 0; j < 4; j++) { + buf->vertex[pos][attr][j] = vert->data[attr][j]; + } + } + + /* update bounding box */ + if (vert->data[0][0] < buf->xmin) + buf->xmin = vert->data[0][0]; + if (vert->data[0][0] > buf->xmax) + buf->xmax = vert->data[0][0]; + if (vert->data[0][1] < buf->ymin) + buf->ymin = vert->data[0][1]; + if (vert->data[0][1] > buf->ymax) + buf->ymax = vert->data[0][1]; +} + + static void render_tri(struct draw_stage *stage, struct prim_header *prim) { + struct render_stage *rs = render_stage(stage); + struct cell_context *cell = rs->cell; + struct cell_prim_buffer *buf = &cell->prim_buffer; + uint i; + printf("Cell render tri\n"); + + if (buf->num_verts + 3 > CELL_MAX_VERTS) { + cell_flush_prim_buffer(cell); + } + + i = buf->num_verts; + assert(i+2 <= CELL_MAX_VERTS); + save_vertex(buf, i+0, prim->v[0]); + save_vertex(buf, i+1, prim->v[1]); + save_vertex(buf, i+2, prim->v[2]); + buf->num_verts += 3; } +/** + * Send the a RENDER command to all SPUs to have them render the prims + * in the current prim_buffer. + */ +void +cell_flush_prim_buffer(struct cell_context *cell) +{ + uint i; + + if (cell->prim_buffer.num_verts == 0) + return; + + printf("*** Flushing prim buffer\n"); + for (i = 0; i < cell->num_spus; i++) { + struct cell_command_render *render = &cell_global.command[i].render; + render->prim_type = PIPE_PRIM_TRIANGLES; + render->num_verts = cell->prim_buffer.num_verts; + render->xmin = cell->prim_buffer.xmin; + render->ymin = cell->prim_buffer.ymin; + render->xmax = cell->prim_buffer.xmax; + render->ymax = cell->prim_buffer.ymax; + render->vertex_data = &cell->prim_buffer.vertex; + ASSERT_ALIGN16(render->vertex_data); + send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_RENDER); + } + + cell->prim_buffer.num_verts = 0; +} + + + static void render_destroy( struct draw_stage *stage ) { FREE( stage ); diff --git a/src/mesa/pipe/cell/ppu/cell_render.h b/src/mesa/pipe/cell/ppu/cell_render.h index d66e1bd..826dcba 100644 --- a/src/mesa/pipe/cell/ppu/cell_render.h +++ b/src/mesa/pipe/cell/ppu/cell_render.h @@ -31,6 +31,9 @@ struct cell_context; struct draw_stage; +extern void +cell_flush_prim_buffer(struct cell_context *cell); + extern struct draw_stage *cell_draw_render_stage( struct cell_context *cell ); #endif /* CELL_RENDER_H */ diff --git a/src/mesa/pipe/cell/ppu/cell_surface.c b/src/mesa/pipe/cell/ppu/cell_surface.c index 185eeb2..c487d04 100644 --- a/src/mesa/pipe/cell/ppu/cell_surface.c +++ b/src/mesa/pipe/cell/ppu/cell_surface.c @@ -51,8 +51,14 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, if (!ps->map) pipe_surface_map(ps); + if (pf_get_size(ps->format) != 4) { + printf("Cell: Skipping non 32bpp clear_surface\n"); + return; + } + for (i = 0; i < cell->num_spus; i++) { struct cell_command_framebuffer *fb = &cell_global.command[i].fb; + printf("%s %u start = 0x%x\n", __FUNCTION__, i, ps->map); fb->start = ps->map; fb->width = ps->width; fb->height = ps->height; @@ -66,7 +72,7 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_CLEAR_TILES); } -#if 1 +#if 0 /* XXX Draw a test triangle over the cleared surface */ for (i = 0; i < cell->num_spus; i++) { /* Same triangle data for all SPUs */ diff --git a/src/mesa/pipe/cell/spu/main.c b/src/mesa/pipe/cell/spu/main.c index cc5eddb..183397a 100644 --- a/src/mesa/pipe/cell/spu/main.c +++ b/src/mesa/pipe/cell/spu/main.c @@ -37,6 +37,7 @@ #include "main.h" #include "tri.h" #include "pipe/cell/common.h" +#include "pipe/p_defines.h" /* helpful headers: @@ -48,6 +49,8 @@ volatile struct cell_init_info init; struct framebuffer fb; +uint tile[TILE_SIZE][TILE_SIZE] ALIGN16_ATTRIB; + int DefaultTag; @@ -62,12 +65,12 @@ wait_on_mask(unsigned tag) void -get_tile(const struct framebuffer *fb, uint tx, uint ty, uint *tile) +get_tile(const struct framebuffer *fb, uint tx, uint ty, uint *tile, + int tag) { uint offset = ty * fb->width_tiles + tx; uint bytesPerTile = TILE_SIZE * TILE_SIZE * 4; ubyte *src = (ubyte *) fb->start + offset * bytesPerTile; - int tag = DefaultTag; assert(tx < fb->width_tiles); assert(ty < fb->height_tiles); @@ -85,12 +88,12 @@ get_tile(const struct framebuffer *fb, uint tx, uint ty, uint *tile) } void -put_tile(const struct framebuffer *fb, uint tx, uint ty, const uint *tile) +put_tile(const struct framebuffer *fb, uint tx, uint ty, const uint *tile, + int tag) { uint offset = ty * fb->width_tiles + tx; uint bytesPerTile = TILE_SIZE * TILE_SIZE * 4; ubyte *dst = (ubyte *) fb->start + offset * bytesPerTile; - int tag = DefaultTag; assert(tx < fb->width_tiles); assert(ty < fb->height_tiles); @@ -100,7 +103,7 @@ put_tile(const struct framebuffer *fb, uint tx, uint ty, const uint *tile) tile, (unsigned int) dst, bytesPerTile); */ mfc_put((void *) tile, /* src in local memory */ - (unsigned int) dst, /* dst in main mory */ + (unsigned int) dst, /* dst in main memory */ bytesPerTile, tag, 0, /* tid */ @@ -119,15 +122,19 @@ clear_tiles(const struct cell_command_clear_tiles *clear) for (i = 0; i < TILE_SIZE * TILE_SIZE; i++) tile[i] = clear->value; + /* printf("SPU: %s num=%d w=%d h=%d\n", __FUNCTION__, num_tiles, fb.width_tiles, fb.height_tiles); + */ + for (i = init.id; i < num_tiles; i += init.num_spus) { uint tx = i % fb.width_tiles; uint ty = i / fb.width_tiles; - put_tile(&fb, tx, ty, tile); + put_tile(&fb, tx, ty, tile, DefaultTag); /* XXX we don't want this here, but it fixes bad tile results */ wait_on_mask(1 << DefaultTag); } + } @@ -155,6 +162,76 @@ triangle(const struct cell_command_triangle *tri) +static void +render(const struct cell_command_render *render) +{ + const uint num_tiles = fb.width_tiles * fb.height_tiles; + struct cell_prim_buffer prim_buffer ALIGN16_ATTRIB; + int tag = DefaultTag; + uint i, j; + + /* + printf("SPU %u: RENDER buffer dst=%p src=%p size=%d\n", + init.id, + &prim_buffer, render->vertex_data, (int)sizeof(prim_buffer)); + */ + + ASSERT_ALIGN16(render->vertex_data); + ASSERT_ALIGN16(&prim_buffer); + + /* get vertex data from main memory */ + mfc_get(&prim_buffer, /* dest */ + (unsigned int) render->vertex_data, /* src */ + sizeof(prim_buffer), /* bytes */ + tag, + 0, /* tid */ + 0 /* rid */); + wait_on_mask( 1 << tag ); /* XXX temporary */ + + /* loop over tiles */ + for (i = init.id; i < num_tiles; i += init.num_spus) { + uint tx = i % fb.width_tiles; + uint ty = i / fb.width_tiles; + + get_tile(&fb, tx, ty, (uint *) tile, DefaultTag); + wait_on_mask(1 << DefaultTag); /* XXX temporary */ + + assert(render->prim_type == PIPE_PRIM_TRIANGLES); + + /* loop over tris */ + for (j = 0; j < render->num_verts; j += 3) { + struct prim_header prim; + + /* + printf(" %u: Triangle %g,%g %g,%g %g,%g\n", + init.id, + prim_buffer.vertex[j*3+0][0][0], + prim_buffer.vertex[j*3+0][0][1], + prim_buffer.vertex[j*3+1][0][0], + prim_buffer.vertex[j*3+1][0][1], + prim_buffer.vertex[j*3+2][0][0], + prim_buffer.vertex[j*3+2][0][1]); + */ + + /* pos */ + COPY_4V(prim.v[0].data[0], prim_buffer.vertex[j+0][0]); + COPY_4V(prim.v[1].data[0], prim_buffer.vertex[j+1][0]); + COPY_4V(prim.v[2].data[0], prim_buffer.vertex[j+2][0]); + + /* color */ + COPY_4V(prim.v[0].data[1], prim_buffer.vertex[j+0][1]); + COPY_4V(prim.v[1].data[1], prim_buffer.vertex[j+1][1]); + COPY_4V(prim.v[2].data[1], prim_buffer.vertex[j+2][1]); + + draw_triangle(&prim, tx, ty); + } + + put_tile(&fb, tx, ty, (uint *) tile, DefaultTag); + wait_on_mask(1 << DefaultTag); /* XXX temp */ + } +} + + /** * Temporary/simple main loop for SPEs: Get a command, execute it, repeat. */ @@ -215,6 +292,12 @@ main_loop(void) printf("SPU %u: TRIANGLE\n", init.id); triangle(&cmd.tri); break; + case CELL_CMD_RENDER: + printf("SPU %u: RENDER %u verts, prim %u\n", + init.id, cmd.render.num_verts, cmd.render.prim_type); + render(&cmd.render); + break; + case CELL_CMD_FINISH: printf("SPU %u: FINISH\n", init.id); /* wait for all outstanding DMAs to finish */ diff --git a/src/mesa/pipe/cell/spu/main.h b/src/mesa/pipe/cell/spu/main.h index 8c27963..47ce4be 100644 --- a/src/mesa/pipe/cell/spu/main.h +++ b/src/mesa/pipe/cell/spu/main.h @@ -37,13 +37,16 @@ extern volatile struct cell_init_info init; struct framebuffer { - void *start; - uint width, height; + void *start; /**< addr of surface in main memory */ + uint width, height; /**< size in pixels */ uint width_tiles, height_tiles; /**< width and height in tiles */ }; +/* XXX Collect these globals in a struct: */ + extern struct framebuffer fb; +extern uint tile[TILE_SIZE][TILE_SIZE] ALIGN16_ATTRIB; extern int DefaultTag; @@ -52,10 +55,12 @@ void wait_on_mask(unsigned tag); void -get_tile(const struct framebuffer *fb, uint tx, uint ty, uint *tile); +get_tile(const struct framebuffer *fb, uint tx, uint ty, uint *tile, + int tag); void -put_tile(const struct framebuffer *fb, uint tx, uint ty, const uint *tile); +put_tile(const struct framebuffer *fb, uint tx, uint ty, const uint *tile, + int tag); #endif /* MAIN_H */ diff --git a/src/mesa/pipe/cell/spu/tri.c b/src/mesa/pipe/cell/spu/tri.c index cd648db..ce759a5 100644 --- a/src/mesa/pipe/cell/spu/tri.c +++ b/src/mesa/pipe/cell/spu/tri.c @@ -74,8 +74,6 @@ static int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy; -static uint tile[TILE_SIZE][TILE_SIZE] ALIGN16_ATTRIB; - #endif @@ -879,11 +877,26 @@ draw_triangle(struct prim_header *tri, uint tx, uint ty) cliprect_maxx = (tx + 1) * TILE_SIZE; cliprect_maxy = (ty + 1) * TILE_SIZE; - get_tile(&fb, tx, ty, (uint *) tile); + get_tile(&fb, tx, ty, (uint *) tile, DefaultTag); wait_on_mask(1 << DefaultTag); setup_tri(tri); - put_tile(&fb, tx, ty, (uint *) tile); + put_tile(&fb, tx, ty, (uint *) tile, DefaultTag); wait_on_mask(1 << DefaultTag); } + + +void +tri_draw(struct prim_header *tri, uint tx, uint ty) +{ + /* set clipping bounds to tile bounds */ + cliprect_minx = tx * TILE_SIZE; + cliprect_miny = ty * TILE_SIZE; + cliprect_maxx = (tx + 1) * TILE_SIZE; + cliprect_maxy = (ty + 1) * TILE_SIZE; + + setup_tri(tri); +} + + diff --git a/src/mesa/pipe/cell/spu/tri.h b/src/mesa/pipe/cell/spu/tri.h index dc66ad0..5760305 100644 --- a/src/mesa/pipe/cell/spu/tri.h +++ b/src/mesa/pipe/cell/spu/tri.h @@ -49,4 +49,8 @@ extern void draw_triangle(struct prim_header *tri, uint tx, uint ty); +extern void +tri_draw(struct prim_header *tri, uint tx, uint ty); + + #endif /* TRI_H */ diff --git a/src/mesa/pipe/xlib/xm_winsys.c b/src/mesa/pipe/xlib/xm_winsys.c index 42c4338..10dc09b 100644 --- a/src/mesa/pipe/xlib/xm_winsys.c +++ b/src/mesa/pipe/xlib/xm_winsys.c @@ -45,7 +45,10 @@ #ifdef GALLIUM_CELL #include "pipe/cell/ppu/cell_context.h" #include "pipe/cell/ppu/cell_winsys.h" +#else +#define TILE_SIZE 32 /* avoid compilation errors */ #endif + #include "xm_winsys_aub.h" @@ -214,7 +217,6 @@ xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf) { XImage *ximage = b->tempImage; struct xm_buffer *xm_buf = xm_bo(surf->buffer); - const int TILE_SIZE = 32; const uint tilesPerRow = (surf->width + TILE_SIZE - 1) / TILE_SIZE; uint x, y; @@ -234,6 +236,7 @@ xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf) int tx = x / TILE_SIZE; int ty = y / TILE_SIZE; int offset = ty * tilesPerRow + tx; + offset *= 4 * TILE_SIZE * TILE_SIZE; ximage->data = (char *) xm_buf->data + offset; @@ -364,6 +367,10 @@ xm_surface_alloc_storage(struct pipe_winsys *winsys, surf->cpp = pf_get_size(format); surf->pitch = round_up(width, alignment / surf->cpp); +#ifdef GALLIUM_CELL /* XXX a bit of a hack */ + height = round_up(height, TILE_SIZE); +#endif + assert(!surf->buffer); surf->buffer = winsys->buffer_create(winsys, alignment, 0, 0); if(!surf->buffer) -- 2.7.4