From 78a43c7e3b2ff5aed1809f93b4f87a418355789e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 1 Jun 2022 20:48:07 +1000 Subject: [PATCH] drm/nouveau/gr/gf100-: make global attrib_cb actually global This was thought to be per-channel initially - it's not. The backing pages for the VMM mappings are shared for all channels. - switches to more straight-forward patch interfaces - prepares for sub-context support - this is saving a *sizeable* amount of vram v2: - whitespace Signed-off-by: Ben Skeggs Reviewed-by: Lyude Paul --- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c | 86 +++++++--------------- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h | 42 +++++------ drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c | 2 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c | 21 +++--- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c | 2 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c | 22 +++--- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c | 2 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c | 2 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c | 2 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c | 2 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c | 2 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c | 7 +- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c | 36 +++++---- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c | 2 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c | 7 +- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c | 65 +++++++++------- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c | 58 ++++++++------- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c | 2 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c | 2 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c | 50 +++++++------ drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c | 2 + drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 72 +++++++----------- drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h | 22 +----- 23 files changed, 241 insertions(+), 269 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c index 076861e..332590f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c @@ -1003,45 +1003,6 @@ gf100_grctx_patch_wr32(struct gf100_gr_chan *chan, u32 addr, u32 data) nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data); } -int -gf100_grctx_mmio_data(struct gf100_grctx *info, u32 size, u32 align, bool priv) -{ - if (info->data) { - info->buffer[info->buffer_nr] = round_up(info->addr, align); - info->addr = info->buffer[info->buffer_nr] + size; - info->data->size = size; - info->data->align = align; - info->data->priv = priv; - info->data++; - return info->buffer_nr++; - } - return -1; -} - -void -gf100_grctx_mmio_item(struct gf100_grctx *info, u32 addr, u32 data, - int shift, int buffer) -{ - struct nvkm_device *device = info->gr->base.engine.subdev.device; - if (info->data) { - if (shift >= 0) { - info->mmio->addr = addr; - info->mmio->data = data; - info->mmio->shift = shift; - info->mmio->buffer = buffer; - if (buffer >= 0) - data |= info->buffer[buffer] >> shift; - info->mmio++; - } else - return; - } else { - if (buffer >= 0) - return; - } - - nvkm_wr32(device, addr, data); -} - void gf100_grctx_generate_r419cb8(struct gf100_gr *gr) { @@ -1068,32 +1029,42 @@ gf100_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr) } void -gf100_grctx_generate_attrib(struct gf100_grctx *info) +gf100_grctx_generate_attrib(struct gf100_gr_chan *chan) { - struct gf100_gr *gr = info->gr; + struct gf100_gr *gr = chan->gr; const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 attrib = grctx->attrib_nr; - const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const int s = 12; - const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false); int gpc, tpc; u32 bo = 0; - mmio_refn(info, 0x418810, 0x80000000, s, b); - mmio_refn(info, 0x419848, 0x10000000, s, b); - mmio_wr32(info, 0x405830, (attrib << 16)); + gf100_grctx_patch_wr32(chan, 0x405830, (attrib << 16)); for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) { const u32 o = TPC_UNIT(gpc, tpc, 0x0520); - mmio_skip(info, o, (attrib << 16) | ++bo); - mmio_wr32(info, o, (attrib << 16) | --bo); + + gf100_grctx_patch_wr32(chan, o, (attrib << 16) | bo); bo += grctx->attrib_nr_max; } } } void +gf100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size) +{ + gf100_grctx_patch_wr32(chan, 0x418810, 0x80000000 | addr >> 12); + gf100_grctx_patch_wr32(chan, 0x419848, 0x10000000 | addr >> 12); +} + +u32 +gf100_grctx_generate_attrib_cb_size(struct gf100_gr *gr) +{ + const struct gf100_grctx_func *grctx = gr->func->grctx; + + return 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max) * gr->tpc_total; +} + +void gf100_grctx_generate_unkn(struct gf100_gr *gr) { } @@ -1368,7 +1339,7 @@ gf100_grctx_generate_floorsweep(struct gf100_gr *gr) } void -gf100_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info) +gf100_grctx_generate_main(struct gf100_gr_chan *chan) { struct gf100_gr *gr = chan->gr; struct nvkm_device *device = gr->base.engine.subdev.device; @@ -1394,7 +1365,8 @@ gf100_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info) grctx->pagepool(chan, chan->pagepool->addr); grctx->bundle(chan, chan->bundle_cb->addr, grctx->bundle_size); - grctx->attrib(info); + grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr)); + grctx->attrib(chan); if (grctx->patch_ltc) grctx->patch_ltc(chan); if (grctx->unknown_size) @@ -1450,7 +1422,6 @@ gf100_grctx_generate(struct gf100_gr *gr, struct gf100_gr_chan *chan, struct nvk struct nvkm_device *device = subdev->device; struct nvkm_memory *data = NULL; struct nvkm_vma *ctx = NULL; - struct gf100_grctx info; int ret, i; u64 addr; @@ -1500,13 +1471,6 @@ gf100_grctx_generate(struct gf100_gr *gr, struct gf100_gr_chan *chan, struct nvk nvkm_wo32(inst, 0x0214, upper_32_bits(ctx->addr + CB_RESERVED)); nvkm_done(inst); - /* Setup default state for mmio list construction. */ - info.gr = gr; - info.data = gr->mmio_data; - info.mmio = gr->mmio_list; - info.addr = ctx->addr; - info.buffer_nr = 0; - /* Make channel current. */ addr = inst->addr >> 12; if (gr->firmware) { @@ -1530,7 +1494,7 @@ gf100_grctx_generate(struct gf100_gr *gr, struct gf100_gr_chan *chan, struct nvk ); } - grctx->main(chan, &info); + grctx->main(chan); /* Trigger a context unload by unsetting the "next channel valid" bit * and faking a context switch interrupt. @@ -1582,6 +1546,8 @@ gf100_grctx = { .bundle_size = 0x1800, .pagepool = gf100_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf100_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h index 57b778e..99bb9af 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h @@ -3,28 +3,12 @@ #define __NVKM_GRCTX_NVC0_H__ #include "gf100.h" -struct gf100_grctx { - struct gf100_gr *gr; - struct gf100_gr_data *data; - struct gf100_gr_mmio *mmio; - int buffer_nr; - u64 buffer[4]; - u64 addr; -}; - -int gf100_grctx_mmio_data(struct gf100_grctx *, u32 size, u32 align, bool priv); -void gf100_grctx_mmio_item(struct gf100_grctx *, u32 addr, u32 data, int s, int); void gf100_grctx_patch_wr32(struct gf100_gr_chan *, u32 addr, u32 data); -#define mmio_vram(a,b,c,d) gf100_grctx_mmio_data((a), (b), (c), (d)) -#define mmio_refn(a,b,c,d,e) gf100_grctx_mmio_item((a), (b), (c), (d), (e)) -#define mmio_skip(a,b,c) mmio_refn((a), (b), (c), -1, -1) -#define mmio_wr32(a,b,c) mmio_refn((a), (b), (c), 0, -1) - struct gf100_grctx_func { void (*unkn88c)(struct gf100_gr *, bool on); /* main context generation function */ - void (*main)(struct gf100_gr_chan *, struct gf100_grctx *); + void (*main)(struct gf100_gr_chan *); /* context-specific modify-on-first-load list generation function */ void (*unkn)(struct gf100_gr *); /* mmio context data */ @@ -47,7 +31,9 @@ struct gf100_grctx_func { void (*pagepool)(struct gf100_gr_chan *, u64 addr); u32 pagepool_size; /* attribute(/alpha) circular buffer */ - void (*attrib)(struct gf100_grctx *); + u32 (*attrib_cb_size)(struct gf100_gr *); + void (*attrib_cb)(struct gf100_gr_chan *, u64 addr, u32 size); + void (*attrib)(struct gf100_gr_chan *); u32 attrib_nr_max; u32 attrib_nr; u32 alpha_nr_max; @@ -86,10 +72,12 @@ struct gf100_grctx_func { extern const struct gf100_grctx_func gf100_grctx; int gf100_grctx_generate(struct gf100_gr *, struct gf100_gr_chan *, struct nvkm_gpuobj *inst); -void gf100_grctx_generate_main(struct gf100_gr_chan *, struct gf100_grctx *); +void gf100_grctx_generate_main(struct gf100_gr_chan *); void gf100_grctx_generate_pagepool(struct gf100_gr_chan *, u64); void gf100_grctx_generate_bundle(struct gf100_gr_chan *, u64, u32); -void gf100_grctx_generate_attrib(struct gf100_grctx *); +u32 gf100_grctx_generate_attrib_cb_size(struct gf100_gr *); +void gf100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32); +void gf100_grctx_generate_attrib(struct gf100_gr_chan *); void gf100_grctx_generate_unkn(struct gf100_gr *); void gf100_grctx_generate_floorsweep(struct gf100_gr *); void gf100_grctx_generate_sm_id(struct gf100_gr *, int, int, int); @@ -101,14 +89,14 @@ void gf100_grctx_generate_max_ways_evict(struct gf100_gr *); void gf100_grctx_generate_r419cb8(struct gf100_gr *); extern const struct gf100_grctx_func gf108_grctx; -void gf108_grctx_generate_attrib(struct gf100_grctx *); +void gf108_grctx_generate_attrib(struct gf100_gr_chan *); void gf108_grctx_generate_unkn(struct gf100_gr *); extern const struct gf100_grctx_func gf104_grctx; extern const struct gf100_grctx_func gf110_grctx; extern const struct gf100_grctx_func gf117_grctx; -void gf117_grctx_generate_attrib(struct gf100_grctx *); +void gf117_grctx_generate_attrib(struct gf100_gr_chan *); void gf117_grctx_generate_rop_mapping(struct gf100_gr *); void gf117_grctx_generate_dist_skip_table(struct gf100_gr *); @@ -134,7 +122,8 @@ extern const struct gf100_grctx_func gk208_grctx; extern const struct gf100_grctx_func gm107_grctx; void gm107_grctx_generate_pagepool(struct gf100_gr_chan *, u64); void gm107_grctx_generate_bundle(struct gf100_gr_chan *, u64, u32); -void gm107_grctx_generate_attrib(struct gf100_grctx *); +void gm107_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32); +void gm107_grctx_generate_attrib(struct gf100_gr_chan *); void gm107_grctx_generate_sm_id(struct gf100_gr *, int, int, int); extern const struct gf100_grctx_func gm200_grctx; @@ -148,10 +137,12 @@ extern const struct gf100_grctx_func gm20b_grctx; extern const struct gf100_grctx_func gp100_grctx; void gp100_grctx_generate_pagepool(struct gf100_gr_chan *, u64); +void gp100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32); void gp100_grctx_generate_smid_config(struct gf100_gr *); extern const struct gf100_grctx_func gp102_grctx; -void gp102_grctx_generate_attrib(struct gf100_grctx *); +u32 gp102_grctx_generate_attrib_cb_size(struct gf100_gr *); +void gp102_grctx_generate_attrib(struct gf100_gr_chan *); extern const struct gf100_grctx_func gp104_grctx; @@ -163,7 +154,8 @@ extern const struct gf100_grctx_func tu102_grctx; void gv100_grctx_unkn88c(struct gf100_gr *, bool); void gv100_grctx_generate_unkn(struct gf100_gr *); extern const struct gf100_gr_init gv100_grctx_init_sw_veid_bundle_init_0[]; -void gv100_grctx_generate_attrib(struct gf100_grctx *); +void gv100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32); +void gv100_grctx_generate_attrib(struct gf100_gr_chan *); void gv100_grctx_generate_rop_mapping(struct gf100_gr *); void gv100_grctx_generate_r400088(struct gf100_gr *, bool); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c index 7a0564b..ba63a3b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c @@ -94,6 +94,8 @@ gf104_grctx = { .bundle_size = 0x1800, .pagepool = gf100_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf100_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c index dda2c32..0bc2eab 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c @@ -733,25 +733,20 @@ gf108_grctx_pack_tpc[] = { ******************************************************************************/ void -gf108_grctx_generate_attrib(struct gf100_grctx *info) +gf108_grctx_generate_attrib(struct gf100_gr_chan *chan) { - struct gf100_gr *gr = info->gr; + struct gf100_gr *gr = chan->gr; const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 alpha = grctx->alpha_nr; const u32 beta = grctx->attrib_nr; - const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const int s = 12; - const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false); const int timeslice_mode = 1; const int max_batches = 0xffff; u32 bo = 0; u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total; int gpc, tpc; - mmio_refn(info, 0x418810, 0x80000000, s, b); - mmio_refn(info, 0x419848, 0x10000000, s, b); - mmio_wr32(info, 0x405830, (beta << 16) | alpha); - mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches); + gf100_grctx_patch_wr32(chan, 0x405830, (beta << 16) | alpha); + gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches); for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) { @@ -759,10 +754,10 @@ gf108_grctx_generate_attrib(struct gf100_grctx *info) const u32 b = beta; const u32 t = timeslice_mode; const u32 o = TPC_UNIT(gpc, tpc, 0x500); - mmio_skip(info, o + 0x20, (t << 28) | (b << 16) | ++bo); - mmio_wr32(info, o + 0x20, (t << 28) | (b << 16) | --bo); + + gf100_grctx_patch_wr32(chan, o + 0x20, (t << 28) | (b << 16) | bo); bo += grctx->attrib_nr_max; - mmio_wr32(info, o + 0x44, (a << 16) | ao); + gf100_grctx_patch_wr32(chan, o + 0x44, (a << 16) | ao); ao += grctx->alpha_nr_max; } } @@ -795,6 +790,8 @@ gf108_grctx = { .bundle_size = 0x1800, .pagepool = gf100_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf108_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c index f5cca5e..64b723b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c @@ -342,6 +342,8 @@ gf110_grctx = { .bundle_size = 0x1800, .pagepool = gf100_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf100_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c index 276c282..6c1f631 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c @@ -241,25 +241,20 @@ gf117_grctx_generate_rop_mapping(struct gf100_gr *gr) } void -gf117_grctx_generate_attrib(struct gf100_grctx *info) +gf117_grctx_generate_attrib(struct gf100_gr_chan *chan) { - struct gf100_gr *gr = info->gr; + struct gf100_gr *gr = chan->gr; const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 alpha = grctx->alpha_nr; const u32 beta = grctx->attrib_nr; - const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const int s = 12; - const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false); const int timeslice_mode = 1; const int max_batches = 0xffff; u32 bo = 0; u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total; int gpc, ppc; - mmio_refn(info, 0x418810, 0x80000000, s, b); - mmio_refn(info, 0x419848, 0x10000000, s, b); - mmio_wr32(info, 0x405830, (beta << 16) | alpha); - mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches); + gf100_grctx_patch_wr32(chan, 0x405830, (beta << 16) | alpha); + gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches); for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) { @@ -267,12 +262,13 @@ gf117_grctx_generate_attrib(struct gf100_grctx *info) const u32 b = beta * gr->ppc_tpc_nr[gpc][ppc]; const u32 t = timeslice_mode; const u32 o = PPC_UNIT(gpc, ppc, 0); + if (!(gr->ppc_mask[gpc] & (1 << ppc))) continue; - mmio_skip(info, o + 0xc0, (t << 28) | (b << 16) | ++bo); - mmio_wr32(info, o + 0xc0, (t << 28) | (b << 16) | --bo); + + gf100_grctx_patch_wr32(chan, o + 0xc0, (t << 28) | (b << 16) | bo); bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc]; - mmio_wr32(info, o + 0xe4, (a << 16) | ao); + gf100_grctx_patch_wr32(chan, o + 0xe4, (a << 16) | ao); ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; } } @@ -294,6 +290,8 @@ gf117_grctx = { .bundle_size = 0x1800, .pagepool = gf100_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf117_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c index 0cfe463..426ad1b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c @@ -510,6 +510,8 @@ gf119_grctx = { .bundle_size = 0x1800, .pagepool = gf100_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf108_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c index ec34740..94233d0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c @@ -981,6 +981,8 @@ gk104_grctx = { .bundle_token_limit = 0x600, .pagepool = gk104_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf117_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c index 86547cfc..4391458 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c @@ -838,6 +838,8 @@ gk110_grctx = { .bundle_token_limit = 0x7c0, .pagepool = gk104_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf117_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c index ebb947b..7b9a34f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c @@ -87,6 +87,8 @@ gk110b_grctx = { .bundle_token_limit = 0x600, .pagepool = gk104_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf117_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c index 4d40512..c78d07a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c @@ -553,6 +553,8 @@ gk208_grctx = { .bundle_token_limit = 0x200, .pagepool = gk104_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf117_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c index 2380ecc..ac5fdcb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c @@ -25,7 +25,7 @@ #include static void -gk20a_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info) +gk20a_grctx_generate_main(struct gf100_gr_chan *chan) { struct gf100_gr *gr = chan->gr; struct nvkm_device *device = gr->base.engine.subdev.device; @@ -39,7 +39,8 @@ gk20a_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info) idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - grctx->attrib(info); + grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr)); + grctx->attrib(chan); grctx->unkn(gr); @@ -75,6 +76,8 @@ gk20a_grctx = { .bundle_token_limit = 0x100, .pagepool = gk104_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf117_grctx_generate_attrib, .attrib_nr_max = 0x240, .attrib_nr = 0x240, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c index d968bcd..019c698 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c @@ -897,25 +897,19 @@ gm107_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr) } void -gm107_grctx_generate_attrib(struct gf100_grctx *info) +gm107_grctx_generate_attrib(struct gf100_gr_chan *chan) { - struct gf100_gr *gr = info->gr; + struct gf100_gr *gr = chan->gr; const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 alpha = grctx->alpha_nr; const u32 attrib = grctx->attrib_nr; - const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const int s = 12; - const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false); const int max_batches = 0xffff; u32 bo = 0; u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total; int gpc, ppc, n = 0; - mmio_refn(info, 0x418810, 0x80000000, s, b); - mmio_refn(info, 0x419848, 0x10000000, s, b); - mmio_refn(info, 0x419c2c, 0x10000000, s, b); - mmio_wr32(info, 0x405830, (attrib << 16) | alpha); - mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches); + gf100_grctx_patch_wr32(chan, 0x405830, (attrib << 16) | alpha); + gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches); for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { @@ -923,19 +917,29 @@ gm107_grctx_generate_attrib(struct gf100_grctx *info) const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc]; const u32 u = 0x418ea0 + (n * 0x04); const u32 o = PPC_UNIT(gpc, ppc, 0); + if (!(gr->ppc_mask[gpc] & (1 << ppc))) continue; - mmio_wr32(info, o + 0xc0, bs); - mmio_wr32(info, o + 0xf4, bo); + + gf100_grctx_patch_wr32(chan, o + 0xc0, bs); + gf100_grctx_patch_wr32(chan, o + 0xf4, bo); bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc]; - mmio_wr32(info, o + 0xe4, as); - mmio_wr32(info, o + 0xf8, ao); + gf100_grctx_patch_wr32(chan, o + 0xe4, as); + gf100_grctx_patch_wr32(chan, o + 0xf8, ao); ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; - mmio_wr32(info, u, ((bs / 3) << 16) | bs); + gf100_grctx_patch_wr32(chan, u, ((bs / 3) << 16) | bs); } } } +void +gm107_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size) +{ + gf100_grctx_generate_attrib_cb(chan, addr, size); + + gf100_grctx_patch_wr32(chan, 0x419c2c, 0x10000000 | addr >> 12); +} + static void gm107_grctx_generate_r406500(struct gf100_gr *gr) { @@ -969,6 +973,8 @@ gm107_grctx = { .bundle_token_limit = 0x2c0, .pagepool = gm107_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gm107_grctx_generate_attrib_cb, .attrib = gm107_grctx_generate_attrib, .attrib_nr_max = 0xff0, .attrib_nr = 0xaa0, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c index 013d05a..6b7034b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c @@ -111,6 +111,8 @@ gm200_grctx = { .bundle_token_limit = 0x780, .pagepool = gm107_grctx_generate_pagepool, .pagepool_size = 0x20000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gm107_grctx_generate_attrib_cb, .attrib = gm107_grctx_generate_attrib, .attrib_nr_max = 0x600, .attrib_nr = 0x400, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c index cb02b02..b8edccf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c @@ -22,7 +22,7 @@ #include "ctxgf100.h" static void -gm20b_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info) +gm20b_grctx_generate_main(struct gf100_gr_chan *chan) { struct gf100_gr *gr = chan->gr; struct nvkm_device *device = gr->base.engine.subdev.device; @@ -36,7 +36,8 @@ gm20b_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info) idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - grctx->attrib(info); + grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr)); + grctx->attrib(chan); grctx->unkn(gr); @@ -78,6 +79,8 @@ gm20b_grctx = { .bundle_token_limit = 0x1c0, .pagepool = gm107_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gm107_grctx_generate_attrib_cb, .attrib = gm107_grctx_generate_attrib, .attrib_nr_max = 0x600, .attrib_nr = 0x400, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c index b2fa7c9..d8977ca 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c @@ -39,32 +39,21 @@ gp100_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr) } static void -gp100_grctx_generate_attrib(struct gf100_grctx *info) +gp100_grctx_generate_attrib(struct gf100_gr_chan *chan) { - struct gf100_gr *gr = info->gr; + struct gf100_gr *gr = chan->gr; const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 alpha = grctx->alpha_nr; const u32 attrib = grctx->attrib_nr; - const int s = 12; const int max_batches = 0xffff; u32 size = grctx->alpha_nr_max * gr->tpc_total; u32 ao = 0; u32 bo = ao + size; - int gpc, ppc, b, n = 0; + int gpc, ppc, n = 0; - for (gpc = 0; gpc < gr->gpc_nr; gpc++) - size += grctx->attrib_nr_max * gr->ppc_nr[gpc] * gr->ppc_tpc_max; - size = ((size * 0x20) + 128) & ~127; - b = mmio_vram(info, size, (1 << s), false); - - mmio_refn(info, 0x418810, 0x80000000, s, b); - mmio_refn(info, 0x419848, 0x10000000, s, b); - mmio_refn(info, 0x419c2c, 0x10000000, s, b); - mmio_refn(info, 0x419b00, 0x00000000, s, b); - mmio_wr32(info, 0x419b04, 0x80000000 | size >> 7); - mmio_wr32(info, 0x405830, attrib); - mmio_wr32(info, 0x40585c, alpha); - mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches); + gf100_grctx_patch_wr32(chan, 0x405830, attrib); + gf100_grctx_patch_wr32(chan, 0x40585c, alpha); + gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches); for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { @@ -72,21 +61,45 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info) const u32 bs = attrib * gr->ppc_tpc_max; const u32 u = 0x418ea0 + (n * 0x04); const u32 o = PPC_UNIT(gpc, ppc, 0); + if (!(gr->ppc_mask[gpc] & (1 << ppc))) continue; - mmio_wr32(info, o + 0xc0, bs); - mmio_wr32(info, o + 0xf4, bo); - mmio_wr32(info, o + 0xf0, bs); + + gf100_grctx_patch_wr32(chan, o + 0xc0, bs); + gf100_grctx_patch_wr32(chan, o + 0xf4, bo); + gf100_grctx_patch_wr32(chan, o + 0xf0, bs); bo += grctx->attrib_nr_max * gr->ppc_tpc_max; - mmio_wr32(info, o + 0xe4, as); - mmio_wr32(info, o + 0xf8, ao); + gf100_grctx_patch_wr32(chan, o + 0xe4, as); + gf100_grctx_patch_wr32(chan, o + 0xf8, ao); ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; - mmio_wr32(info, u, bs); + gf100_grctx_patch_wr32(chan, u, bs); } } - mmio_wr32(info, 0x418eec, 0x00000000); - mmio_wr32(info, 0x41befc, 0x00000000); + gf100_grctx_patch_wr32(chan, 0x418eec, 0x00000000); + gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000000); +} + +void +gp100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size) +{ + gm107_grctx_generate_attrib_cb(chan, addr, size); + + gf100_grctx_patch_wr32(chan, 0x419b00, 0x00000000 | addr >> 12); + gf100_grctx_patch_wr32(chan, 0x419b04, 0x80000000 | size >> 7); +} + +static u32 +gp100_grctx_generate_attrib_cb_size(struct gf100_gr *gr) +{ + const struct gf100_grctx_func *grctx = gr->func->grctx; + u32 size = grctx->alpha_nr_max * gr->tpc_total; + int gpc; + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) + size += grctx->attrib_nr_max * gr->func->ppc_nr * gr->ppc_tpc_max; + + return ((size * 0x20) + 128) & ~127; } void @@ -120,6 +133,8 @@ gp100_grctx = { .bundle_token_limit = 0x1080, .pagepool = gp100_grctx_generate_pagepool, .pagepool_size = 0x20000, + .attrib_cb_size = gp100_grctx_generate_attrib_cb_size, + .attrib_cb = gp100_grctx_generate_attrib_cb, .attrib = gp100_grctx_generate_attrib, .attrib_nr_max = 0x660, .attrib_nr = 0x440, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c index daee17b..7d372d7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c @@ -37,33 +37,22 @@ gp102_grctx_generate_r408840(struct gf100_gr *gr) } void -gp102_grctx_generate_attrib(struct gf100_grctx *info) +gp102_grctx_generate_attrib(struct gf100_gr_chan *chan) { - struct gf100_gr *gr = info->gr; + struct gf100_gr *gr = chan->gr; const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 alpha = grctx->alpha_nr; const u32 attrib = grctx->attrib_nr; const u32 gfxp = grctx->gfxp_nr; - const int s = 12; const int max_batches = 0xffff; u32 size = grctx->alpha_nr_max * gr->tpc_total; u32 ao = 0; u32 bo = ao + size; - int gpc, ppc, b, n = 0; + int gpc, ppc, n = 0; - for (gpc = 0; gpc < gr->gpc_nr; gpc++) - size += grctx->gfxp_nr * gr->ppc_nr[gpc] * gr->ppc_tpc_max; - size = ((size * 0x20) + 128) & ~127; - b = mmio_vram(info, size, (1 << s), false); - - mmio_refn(info, 0x418810, 0x80000000, s, b); - mmio_refn(info, 0x419848, 0x10000000, s, b); - mmio_refn(info, 0x419c2c, 0x10000000, s, b); - mmio_refn(info, 0x419b00, 0x00000000, s, b); - mmio_wr32(info, 0x419b04, 0x80000000 | size >> 7); - mmio_wr32(info, 0x405830, attrib); - mmio_wr32(info, 0x40585c, alpha); - mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches); + gf100_grctx_patch_wr32(chan, 0x405830, attrib); + gf100_grctx_patch_wr32(chan, 0x40585c, alpha); + gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches); for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { @@ -73,22 +62,37 @@ gp102_grctx_generate_attrib(struct gf100_grctx *info) const u32 u = 0x418ea0 + (n * 0x04); const u32 o = PPC_UNIT(gpc, ppc, 0); const u32 p = GPC_UNIT(gpc, 0xc44 + (ppc * 4)); + if (!(gr->ppc_mask[gpc] & (1 << ppc))) continue; - mmio_wr32(info, o + 0xc0, gs); - mmio_wr32(info, p, bs); - mmio_wr32(info, o + 0xf4, bo); - mmio_wr32(info, o + 0xf0, bs); + + gf100_grctx_patch_wr32(chan, o + 0xc0, gs); + gf100_grctx_patch_wr32(chan, p, bs); + gf100_grctx_patch_wr32(chan, o + 0xf4, bo); + gf100_grctx_patch_wr32(chan, o + 0xf0, bs); bo += gs; - mmio_wr32(info, o + 0xe4, as); - mmio_wr32(info, o + 0xf8, ao); + gf100_grctx_patch_wr32(chan, o + 0xe4, as); + gf100_grctx_patch_wr32(chan, o + 0xf8, ao); ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; - mmio_wr32(info, u, bs); + gf100_grctx_patch_wr32(chan, u, bs); } } - mmio_wr32(info, 0x4181e4, 0x00000100); - mmio_wr32(info, 0x41befc, 0x00000100); + gf100_grctx_patch_wr32(chan, 0x4181e4, 0x00000100); + gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000100); +} + +u32 +gp102_grctx_generate_attrib_cb_size(struct gf100_gr *gr) +{ + const struct gf100_grctx_func *grctx = gr->func->grctx; + u32 size = grctx->alpha_nr_max * gr->tpc_total; + int gpc; + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) + size += grctx->gfxp_nr * gr->func->ppc_nr * gr->ppc_tpc_max; + + return ((size * 0x20) + 127) & ~127; } const struct gf100_grctx_func @@ -101,6 +105,8 @@ gp102_grctx = { .bundle_token_limit = 0x900, .pagepool = gp100_grctx_generate_pagepool, .pagepool_size = 0x20000, + .attrib_cb_size = gp102_grctx_generate_attrib_cb_size, + .attrib_cb = gp100_grctx_generate_attrib_cb, .attrib = gp102_grctx_generate_attrib, .attrib_nr_max = 0x4b0, .attrib_nr = 0x320, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c index 3b85e3d..90b5f79 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c @@ -31,6 +31,8 @@ gp104_grctx = { .bundle_token_limit = 0x900, .pagepool = gp100_grctx_generate_pagepool, .pagepool_size = 0x20000, + .attrib_cb_size = gp102_grctx_generate_attrib_cb_size, + .attrib_cb = gp100_grctx_generate_attrib_cb, .attrib = gp102_grctx_generate_attrib, .attrib_nr_max = 0x4b0, .attrib_nr = 0x320, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c index 5060c5e..d191761 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c @@ -39,6 +39,8 @@ gp107_grctx = { .bundle_token_limit = 0x300, .pagepool = gp100_grctx_generate_pagepool, .pagepool_size = 0x20000, + .attrib_cb_size = gp102_grctx_generate_attrib_cb_size, + .attrib_cb = gp100_grctx_generate_attrib_cb, .attrib = gp102_grctx_generate_attrib, .attrib_nr_max = 0x15de, .attrib_nr = 0x540, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c index 39553d5..c2e5d43 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c @@ -59,31 +59,20 @@ gv100_grctx_pack_sw_veid_bundle_init[] = { }; void -gv100_grctx_generate_attrib(struct gf100_grctx *info) +gv100_grctx_generate_attrib(struct gf100_gr_chan *chan) { - struct gf100_gr *gr = info->gr; + struct gf100_gr *gr = chan->gr; const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 alpha = grctx->alpha_nr; const u32 attrib = grctx->attrib_nr; const u32 gfxp = grctx->gfxp_nr; - const int s = 12; u32 size = grctx->alpha_nr_max * gr->tpc_total; u32 ao = 0; u32 bo = ao + size; - int gpc, ppc, b, n = 0; + int gpc, ppc, n = 0; - for (gpc = 0; gpc < gr->gpc_nr; gpc++) - size += grctx->gfxp_nr * gr->ppc_nr[gpc] * gr->ppc_tpc_max; - size = ((size * 0x20) + 127) & ~127; - b = mmio_vram(info, size, (1 << s), false); - - mmio_refn(info, 0x418810, 0x80000000, s, b); - mmio_refn(info, 0x419848, 0x10000000, s, b); - mmio_refn(info, 0x419c2c, 0x10000000, s, b); - mmio_refn(info, 0x419e00, 0x00000000, s, b); - mmio_wr32(info, 0x419e04, 0x80000000 | size >> 7); - mmio_wr32(info, 0x405830, attrib); - mmio_wr32(info, 0x40585c, alpha); + gf100_grctx_patch_wr32(chan, 0x405830, attrib); + gf100_grctx_patch_wr32(chan, 0x40585c, alpha); for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { @@ -92,21 +81,32 @@ gv100_grctx_generate_attrib(struct gf100_grctx *info) const u32 gs = gfxp * gr->ppc_tpc_max; const u32 u = 0x418ea0 + (n * 0x04); const u32 o = PPC_UNIT(gpc, ppc, 0); + if (!(gr->ppc_mask[gpc] & (1 << ppc))) continue; - mmio_wr32(info, o + 0xc0, gs); - mmio_wr32(info, o + 0xf4, bo); - mmio_wr32(info, o + 0xf0, bs); + + gf100_grctx_patch_wr32(chan, o + 0xc0, gs); + gf100_grctx_patch_wr32(chan, o + 0xf4, bo); + gf100_grctx_patch_wr32(chan, o + 0xf0, bs); bo += gs; - mmio_wr32(info, o + 0xe4, as); - mmio_wr32(info, o + 0xf8, ao); + gf100_grctx_patch_wr32(chan, o + 0xe4, as); + gf100_grctx_patch_wr32(chan, o + 0xf8, ao); ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; - mmio_wr32(info, u, bs); + gf100_grctx_patch_wr32(chan, u, bs); } } - mmio_wr32(info, 0x4181e4, 0x00000100); - mmio_wr32(info, 0x41befc, 0x00000100); + gf100_grctx_patch_wr32(chan, 0x4181e4, 0x00000100); + gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000100); +} + +void +gv100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size) +{ + gm107_grctx_generate_attrib_cb(chan, addr, size); + + gf100_grctx_patch_wr32(chan, 0x419e00, 0x00000000 | addr >> 12); + gf100_grctx_patch_wr32(chan, 0x419e04, 0x80000000 | size >> 7); } void @@ -198,6 +198,8 @@ gv100_grctx = { .bundle_token_limit = 0x1680, .pagepool = gp100_grctx_generate_pagepool, .pagepool_size = 0x20000, + .attrib_cb_size = gp102_grctx_generate_attrib_cb_size, + .attrib_cb = gv100_grctx_generate_attrib_cb, .attrib = gv100_grctx_generate_attrib, .attrib_nr_max = 0x6c0, .attrib_nr = 0x480, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c index 1abad38..1a151e8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c @@ -74,6 +74,8 @@ tu102_grctx = { .bundle_token_limit = 0xa80, .pagepool = gp100_grctx_generate_pagepool, .pagepool_size = 0x20000, + .attrib_cb_size = gp102_grctx_generate_attrib_cb_size, + .attrib_cb = gv100_grctx_generate_attrib_cb, .attrib = gv100_grctx_generate_attrib, .attrib_nr_max = 0x800, .attrib_nr = 0x700, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 5d1e45b..09141b1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -355,16 +355,11 @@ static void * gf100_gr_chan_dtor(struct nvkm_object *object) { struct gf100_gr_chan *chan = gf100_gr_chan(object); - int i; - - for (i = 0; i < ARRAY_SIZE(chan->data); i++) { - nvkm_vmm_put(chan->vmm, &chan->data[i].vma); - nvkm_memory_unref(&chan->data[i].mem); - } nvkm_vmm_put(chan->vmm, &chan->mmio_vma); nvkm_memory_unref(&chan->mmio); + nvkm_vmm_put(chan->vmm, &chan->attrib_cb); nvkm_vmm_put(chan->vmm, &chan->unknown); nvkm_vmm_put(chan->vmm, &chan->bundle_cb); nvkm_vmm_put(chan->vmm, &chan->pagepool); @@ -384,12 +379,10 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, struct nvkm_object **pobject) { struct gf100_gr *gr = gf100_gr(base); - struct gf100_gr_data *data = gr->mmio_data; - struct gf100_gr_mmio *mmio = gr->mmio_list; struct gf100_gr_chan *chan; struct gf100_vmm_map_v0 args = { .priv = 1 }; struct nvkm_device *device = gr->base.engine.subdev.device; - int ret, i; + int ret; if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL))) return -ENOMEM; @@ -416,6 +409,22 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, if (ret) return ret; + /* Map attribute circular buffer. */ + ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->attrib_cb), &chan->attrib_cb); + if (ret) + return ret; + + if (device->card_type < GP100) { + ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb, NULL, 0); + if (ret) + return ret; + } else { + ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb, + &args, sizeof(args));; + if (ret) + return ret; + } + /* Map some context buffer of unknown purpose. */ if (gr->func->grctx->unknown_size) { ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->unknown), &chan->unknown); @@ -457,47 +466,12 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, if (ret) return ret; - /* allocate buffers referenced by mmio list */ - for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) { - ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, - data->size, data->align, false, - &chan->data[i].mem); - if (ret) - return ret; - - ret = nvkm_vmm_get(fifoch->vmm, 12, - nvkm_memory_size(chan->data[i].mem), - &chan->data[i].vma); - if (ret) - return ret; - - args.priv = data->priv; - - ret = nvkm_memory_map(chan->data[i].mem, 0, chan->vmm, - chan->data[i].vma, &args, sizeof(args)); - if (ret) - return ret; - - data++; - } - /* finally, fill in the mmio list and point the context at it */ nvkm_kmap(chan->mmio); gr->func->grctx->pagepool(chan, chan->pagepool->addr); gr->func->grctx->bundle(chan, chan->bundle_cb->addr, gr->func->grctx->bundle_size); - for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) { - u32 addr = mmio->addr; - u32 data = mmio->data; - - if (mmio->buffer >= 0) { - u64 info = chan->data[mmio->buffer].vma->addr; - data |= info >> mmio->shift; - } - - nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr); - nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data); - mmio++; - } + gr->func->grctx->attrib_cb(chan, chan->attrib_cb->addr, gr->func->grctx->attrib_cb_size(gr)); + gr->func->grctx->attrib(chan); if (gr->func->grctx->patch_ltc) gr->func->grctx->patch_ltc(chan); if (gr->func->grctx->unknown_size) @@ -2015,6 +1989,11 @@ gf100_gr_oneinit(struct nvkm_gr *base) if (ret) return ret; + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->attrib_cb_size(gr), + 0x1000, false, &gr->attrib_cb); + if (ret) + return ret; + if (gr->func->grctx->unknown_size) { ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->unknown_size, 0x100, false, &gr->unknown); @@ -2092,6 +2071,7 @@ gf100_gr_dtor(struct nvkm_gr *base) kfree(gr->data); nvkm_memory_unref(&gr->unknown); + nvkm_memory_unref(&gr->attrib_cb); nvkm_memory_unref(&gr->bundle_cb); nvkm_memory_unref(&gr->pagepool); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h index ae3eee65..7f467f6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h @@ -44,19 +44,6 @@ struct nvkm_acr_lsfw; #define PPC_UNIT(t, m, r) (0x503000 + (t) * 0x8000 + (m) * 0x200 + (r)) #define TPC_UNIT(t, m, r) (0x504000 + (t) * 0x8000 + (m) * 0x800 + (r)) -struct gf100_gr_data { - u32 size; - u32 align; - bool priv; -}; - -struct gf100_gr_mmio { - u32 addr; - u32 data; - u32 shift; - int buffer; -}; - struct gf100_gr_zbc_color { u32 format; u32 ds[4]; @@ -123,6 +110,7 @@ struct gf100_gr { struct nvkm_memory *pagepool; struct nvkm_memory *bundle_cb; + struct nvkm_memory *attrib_cb; struct nvkm_memory *unknown; u8 screen_tile_row_offset; @@ -134,8 +122,6 @@ struct gf100_gr { } sm[TPC_MAX]; u8 sm_nr; - struct gf100_gr_data mmio_data[4]; - struct gf100_gr_mmio mmio_list[4096/8]; u32 size; u32 *data; u32 size_zcull; @@ -264,16 +250,12 @@ struct gf100_gr_chan { struct nvkm_vma *pagepool; struct nvkm_vma *bundle_cb; + struct nvkm_vma *attrib_cb; struct nvkm_vma *unknown; struct nvkm_memory *mmio; struct nvkm_vma *mmio_vma; int mmio_nr; - - struct { - struct nvkm_memory *mem; - struct nvkm_vma *vma; - } data[4]; }; void gf100_gr_ctxctl_debug(struct gf100_gr *); -- 2.7.4