From 78a43c7e3b2ff5aed1809f93b4f87a418355789e Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Wed, 1 Jun 2022 20:48:07 +1000
Subject: [PATCH] drm/nouveau/gr/gf100-: make global attrib_cb actually global

This was thought to be per-channel initially - it's not.  The backing
pages for the VMM mappings are shared for all channels.

- switches to more straight-forward patch interfaces
- prepares for sub-context support
- this is saving a *sizeable* amount of vram

v2:
- whitespace

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Reviewed-by: Lyude Paul <lyude@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c  | 86 +++++++---------------
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h  | 42 +++++------
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c  |  2 +
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c  | 21 +++---
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c  |  2 +
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c  | 22 +++---
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c  |  2 +
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c  |  2 +
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c  |  2 +
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c |  2 +
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c  |  2 +
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c  |  7 +-
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c  | 36 +++++----
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c  |  2 +
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c  |  7 +-
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c  | 65 +++++++++-------
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c  | 58 ++++++++-------
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c  |  2 +
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c  |  2 +
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c  | 50 +++++++------
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c  |  2 +
 drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c     | 72 +++++++-----------
 drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h     | 22 +-----
 23 files changed, 241 insertions(+), 269 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
index 076861e..332590f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
@@ -1003,45 +1003,6 @@ gf100_grctx_patch_wr32(struct gf100_gr_chan *chan, u32 addr, u32 data)
 	nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
 }
 
-int
-gf100_grctx_mmio_data(struct gf100_grctx *info, u32 size, u32 align, bool priv)
-{
-	if (info->data) {
-		info->buffer[info->buffer_nr] = round_up(info->addr, align);
-		info->addr = info->buffer[info->buffer_nr] + size;
-		info->data->size = size;
-		info->data->align = align;
-		info->data->priv = priv;
-		info->data++;
-		return info->buffer_nr++;
-	}
-	return -1;
-}
-
-void
-gf100_grctx_mmio_item(struct gf100_grctx *info, u32 addr, u32 data,
-		      int shift, int buffer)
-{
-	struct nvkm_device *device = info->gr->base.engine.subdev.device;
-	if (info->data) {
-		if (shift >= 0) {
-			info->mmio->addr = addr;
-			info->mmio->data = data;
-			info->mmio->shift = shift;
-			info->mmio->buffer = buffer;
-			if (buffer >= 0)
-				data |= info->buffer[buffer] >> shift;
-			info->mmio++;
-		} else
-			return;
-	} else {
-		if (buffer >= 0)
-			return;
-	}
-
-	nvkm_wr32(device, addr, data);
-}
-
 void
 gf100_grctx_generate_r419cb8(struct gf100_gr *gr)
 {
@@ -1068,32 +1029,42 @@ gf100_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr)
 }
 
 void
-gf100_grctx_generate_attrib(struct gf100_grctx *info)
+gf100_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32 attrib = grctx->attrib_nr;
-	const u32   size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
-	const int s = 12;
-	const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false);
 	int gpc, tpc;
 	u32 bo = 0;
 
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_wr32(info, 0x405830, (attrib << 16));
+	gf100_grctx_patch_wr32(chan, 0x405830, (attrib << 16));
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
 			const u32 o = TPC_UNIT(gpc, tpc, 0x0520);
-			mmio_skip(info, o, (attrib << 16) | ++bo);
-			mmio_wr32(info, o, (attrib << 16) | --bo);
+
+			gf100_grctx_patch_wr32(chan, o, (attrib << 16) | bo);
 			bo += grctx->attrib_nr_max;
 		}
 	}
 }
 
 void
+gf100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size)
+{
+	gf100_grctx_patch_wr32(chan, 0x418810, 0x80000000 | addr >> 12);
+	gf100_grctx_patch_wr32(chan, 0x419848, 0x10000000 | addr >> 12);
+}
+
+u32
+gf100_grctx_generate_attrib_cb_size(struct gf100_gr *gr)
+{
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+
+	return 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max) * gr->tpc_total;
+}
+
+void
 gf100_grctx_generate_unkn(struct gf100_gr *gr)
 {
 }
@@ -1368,7 +1339,7 @@ gf100_grctx_generate_floorsweep(struct gf100_gr *gr)
 }
 
 void
-gf100_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info)
+gf100_grctx_generate_main(struct gf100_gr_chan *chan)
 {
 	struct gf100_gr *gr = chan->gr;
 	struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -1394,7 +1365,8 @@ gf100_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info)
 
 	grctx->pagepool(chan, chan->pagepool->addr);
 	grctx->bundle(chan, chan->bundle_cb->addr, grctx->bundle_size);
-	grctx->attrib(info);
+	grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr));
+	grctx->attrib(chan);
 	if (grctx->patch_ltc)
 		grctx->patch_ltc(chan);
 	if (grctx->unknown_size)
@@ -1450,7 +1422,6 @@ gf100_grctx_generate(struct gf100_gr *gr, struct gf100_gr_chan *chan, struct nvk
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_memory *data = NULL;
 	struct nvkm_vma *ctx = NULL;
-	struct gf100_grctx info;
 	int ret, i;
 	u64 addr;
 
@@ -1500,13 +1471,6 @@ gf100_grctx_generate(struct gf100_gr *gr, struct gf100_gr_chan *chan, struct nvk
 	nvkm_wo32(inst, 0x0214, upper_32_bits(ctx->addr + CB_RESERVED));
 	nvkm_done(inst);
 
-	/* Setup default state for mmio list construction. */
-	info.gr = gr;
-	info.data = gr->mmio_data;
-	info.mmio = gr->mmio_list;
-	info.addr = ctx->addr;
-	info.buffer_nr = 0;
-
 	/* Make channel current. */
 	addr = inst->addr >> 12;
 	if (gr->firmware) {
@@ -1530,7 +1494,7 @@ gf100_grctx_generate(struct gf100_gr *gr, struct gf100_gr_chan *chan, struct nvk
 		);
 	}
 
-	grctx->main(chan, &info);
+	grctx->main(chan);
 
 	/* Trigger a context unload by unsetting the "next channel valid" bit
 	 * and faking a context switch interrupt.
@@ -1582,6 +1546,8 @@ gf100_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf100_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
index 57b778e..99bb9af 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
@@ -3,28 +3,12 @@
 #define __NVKM_GRCTX_NVC0_H__
 #include "gf100.h"
 
-struct gf100_grctx {
-	struct gf100_gr *gr;
-	struct gf100_gr_data *data;
-	struct gf100_gr_mmio *mmio;
-	int buffer_nr;
-	u64 buffer[4];
-	u64 addr;
-};
-
-int  gf100_grctx_mmio_data(struct gf100_grctx *, u32 size, u32 align, bool priv);
-void gf100_grctx_mmio_item(struct gf100_grctx *, u32 addr, u32 data, int s, int);
 void gf100_grctx_patch_wr32(struct gf100_gr_chan *, u32 addr, u32 data);
 
-#define mmio_vram(a,b,c,d) gf100_grctx_mmio_data((a), (b), (c), (d))
-#define mmio_refn(a,b,c,d,e) gf100_grctx_mmio_item((a), (b), (c), (d), (e))
-#define mmio_skip(a,b,c) mmio_refn((a), (b), (c), -1, -1)
-#define mmio_wr32(a,b,c) mmio_refn((a), (b), (c),  0, -1)
-
 struct gf100_grctx_func {
 	void (*unkn88c)(struct gf100_gr *, bool on);
 	/* main context generation function */
-	void  (*main)(struct gf100_gr_chan *, struct gf100_grctx *);
+	void  (*main)(struct gf100_gr_chan *);
 	/* context-specific modify-on-first-load list generation function */
 	void  (*unkn)(struct gf100_gr *);
 	/* mmio context data */
@@ -47,7 +31,9 @@ struct gf100_grctx_func {
 	void (*pagepool)(struct gf100_gr_chan *, u64 addr);
 	u32 pagepool_size;
 	/* attribute(/alpha) circular buffer */
-	void (*attrib)(struct gf100_grctx *);
+	u32 (*attrib_cb_size)(struct gf100_gr *);
+	void (*attrib_cb)(struct gf100_gr_chan *, u64 addr, u32 size);
+	void (*attrib)(struct gf100_gr_chan *);
 	u32 attrib_nr_max;
 	u32 attrib_nr;
 	u32 alpha_nr_max;
@@ -86,10 +72,12 @@ struct gf100_grctx_func {
 
 extern const struct gf100_grctx_func gf100_grctx;
 int  gf100_grctx_generate(struct gf100_gr *, struct gf100_gr_chan *, struct nvkm_gpuobj *inst);
-void gf100_grctx_generate_main(struct gf100_gr_chan *, struct gf100_grctx *);
+void gf100_grctx_generate_main(struct gf100_gr_chan *);
 void gf100_grctx_generate_pagepool(struct gf100_gr_chan *, u64);
 void gf100_grctx_generate_bundle(struct gf100_gr_chan *, u64, u32);
-void gf100_grctx_generate_attrib(struct gf100_grctx *);
+u32 gf100_grctx_generate_attrib_cb_size(struct gf100_gr *);
+void gf100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32);
+void gf100_grctx_generate_attrib(struct gf100_gr_chan *);
 void gf100_grctx_generate_unkn(struct gf100_gr *);
 void gf100_grctx_generate_floorsweep(struct gf100_gr *);
 void gf100_grctx_generate_sm_id(struct gf100_gr *, int, int, int);
@@ -101,14 +89,14 @@ void gf100_grctx_generate_max_ways_evict(struct gf100_gr *);
 void gf100_grctx_generate_r419cb8(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gf108_grctx;
-void gf108_grctx_generate_attrib(struct gf100_grctx *);
+void gf108_grctx_generate_attrib(struct gf100_gr_chan *);
 void gf108_grctx_generate_unkn(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gf104_grctx;
 extern const struct gf100_grctx_func gf110_grctx;
 
 extern const struct gf100_grctx_func gf117_grctx;
-void gf117_grctx_generate_attrib(struct gf100_grctx *);
+void gf117_grctx_generate_attrib(struct gf100_gr_chan *);
 void gf117_grctx_generate_rop_mapping(struct gf100_gr *);
 void gf117_grctx_generate_dist_skip_table(struct gf100_gr *);
 
@@ -134,7 +122,8 @@ extern const struct gf100_grctx_func gk208_grctx;
 extern const struct gf100_grctx_func gm107_grctx;
 void gm107_grctx_generate_pagepool(struct gf100_gr_chan *, u64);
 void gm107_grctx_generate_bundle(struct gf100_gr_chan *, u64, u32);
-void gm107_grctx_generate_attrib(struct gf100_grctx *);
+void gm107_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32);
+void gm107_grctx_generate_attrib(struct gf100_gr_chan *);
 void gm107_grctx_generate_sm_id(struct gf100_gr *, int, int, int);
 
 extern const struct gf100_grctx_func gm200_grctx;
@@ -148,10 +137,12 @@ extern const struct gf100_grctx_func gm20b_grctx;
 
 extern const struct gf100_grctx_func gp100_grctx;
 void gp100_grctx_generate_pagepool(struct gf100_gr_chan *, u64);
+void gp100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32);
 void gp100_grctx_generate_smid_config(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gp102_grctx;
-void gp102_grctx_generate_attrib(struct gf100_grctx *);
+u32 gp102_grctx_generate_attrib_cb_size(struct gf100_gr *);
+void gp102_grctx_generate_attrib(struct gf100_gr_chan *);
 
 extern const struct gf100_grctx_func gp104_grctx;
 
@@ -163,7 +154,8 @@ extern const struct gf100_grctx_func tu102_grctx;
 void gv100_grctx_unkn88c(struct gf100_gr *, bool);
 void gv100_grctx_generate_unkn(struct gf100_gr *);
 extern const struct gf100_gr_init gv100_grctx_init_sw_veid_bundle_init_0[];
-void gv100_grctx_generate_attrib(struct gf100_grctx *);
+void gv100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32);
+void gv100_grctx_generate_attrib(struct gf100_gr_chan *);
 void gv100_grctx_generate_rop_mapping(struct gf100_gr *);
 void gv100_grctx_generate_r400088(struct gf100_gr *, bool);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
index 7a0564b..ba63a3b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
@@ -94,6 +94,8 @@ gf104_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf100_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
index dda2c32..0bc2eab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
@@ -733,25 +733,20 @@ gf108_grctx_pack_tpc[] = {
  ******************************************************************************/
 
 void
-gf108_grctx_generate_attrib(struct gf100_grctx *info)
+gf108_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32   beta = grctx->attrib_nr;
-	const u32   size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
-	const int s = 12;
-	const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false);
 	const int timeslice_mode = 1;
 	const int max_batches = 0xffff;
 	u32 bo = 0;
 	u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total;
 	int gpc, tpc;
 
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_wr32(info, 0x405830, (beta << 16) | alpha);
-	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+	gf100_grctx_patch_wr32(chan, 0x405830, (beta << 16) | alpha);
+	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
@@ -759,10 +754,10 @@ gf108_grctx_generate_attrib(struct gf100_grctx *info)
 			const u32 b =  beta;
 			const u32 t = timeslice_mode;
 			const u32 o = TPC_UNIT(gpc, tpc, 0x500);
-			mmio_skip(info, o + 0x20, (t << 28) | (b << 16) | ++bo);
-			mmio_wr32(info, o + 0x20, (t << 28) | (b << 16) | --bo);
+
+			gf100_grctx_patch_wr32(chan, o + 0x20, (t << 28) | (b << 16) | bo);
 			bo += grctx->attrib_nr_max;
-			mmio_wr32(info, o + 0x44, (a << 16) | ao);
+			gf100_grctx_patch_wr32(chan, o + 0x44, (a << 16) | ao);
 			ao += grctx->alpha_nr_max;
 		}
 	}
@@ -795,6 +790,8 @@ gf108_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf108_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
index f5cca5e..64b723b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
@@ -342,6 +342,8 @@ gf110_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf100_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
index 276c282..6c1f631 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
@@ -241,25 +241,20 @@ gf117_grctx_generate_rop_mapping(struct gf100_gr *gr)
 }
 
 void
-gf117_grctx_generate_attrib(struct gf100_grctx *info)
+gf117_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32   beta = grctx->attrib_nr;
-	const u32   size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
-	const int s = 12;
-	const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false);
 	const int timeslice_mode = 1;
 	const int max_batches = 0xffff;
 	u32 bo = 0;
 	u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total;
 	int gpc, ppc;
 
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_wr32(info, 0x405830, (beta << 16) | alpha);
-	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+	gf100_grctx_patch_wr32(chan, 0x405830, (beta << 16) | alpha);
+	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) {
@@ -267,12 +262,13 @@ gf117_grctx_generate_attrib(struct gf100_grctx *info)
 			const u32 b =  beta * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 t = timeslice_mode;
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
+
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
-			mmio_skip(info, o + 0xc0, (t << 28) | (b << 16) | ++bo);
-			mmio_wr32(info, o + 0xc0, (t << 28) | (b << 16) | --bo);
+
+			gf100_grctx_patch_wr32(chan, o + 0xc0, (t << 28) | (b << 16) | bo);
 			bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, o + 0xe4, (a << 16) | ao);
+			gf100_grctx_patch_wr32(chan, o + 0xe4, (a << 16) | ao);
 			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
 		}
 	}
@@ -294,6 +290,8 @@ gf117_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
index 0cfe463..426ad1b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
@@ -510,6 +510,8 @@ gf119_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf108_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
index ec34740..94233d0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
@@ -981,6 +981,8 @@ gk104_grctx = {
 	.bundle_token_limit = 0x600,
 	.pagepool = gk104_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
index 86547cfc..4391458 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
@@ -838,6 +838,8 @@ gk110_grctx = {
 	.bundle_token_limit = 0x7c0,
 	.pagepool = gk104_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
index ebb947b..7b9a34f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
@@ -87,6 +87,8 @@ gk110b_grctx = {
 	.bundle_token_limit = 0x600,
 	.pagepool = gk104_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
index 4d40512..c78d07a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
@@ -553,6 +553,8 @@ gk208_grctx = {
 	.bundle_token_limit = 0x200,
 	.pagepool = gk104_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
index 2380ecc..ac5fdcb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
@@ -25,7 +25,7 @@
 #include <subdev/mc.h>
 
 static void
-gk20a_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info)
+gk20a_grctx_generate_main(struct gf100_gr_chan *chan)
 {
 	struct gf100_gr *gr = chan->gr;
 	struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -39,7 +39,8 @@ gk20a_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info)
 
 	idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
 
-	grctx->attrib(info);
+	grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr));
+	grctx->attrib(chan);
 
 	grctx->unkn(gr);
 
@@ -75,6 +76,8 @@ gk20a_grctx = {
 	.bundle_token_limit = 0x100,
 	.pagepool = gk104_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x240,
 	.attrib_nr = 0x240,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
index d968bcd..019c698 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
@@ -897,25 +897,19 @@ gm107_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr)
 }
 
 void
-gm107_grctx_generate_attrib(struct gf100_grctx *info)
+gm107_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32 attrib = grctx->attrib_nr;
-	const u32   size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
-	const int s = 12;
-	const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false);
 	const int max_batches = 0xffff;
 	u32 bo = 0;
 	u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total;
 	int gpc, ppc, n = 0;
 
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_refn(info, 0x419c2c, 0x10000000, s, b);
-	mmio_wr32(info, 0x405830, (attrib << 16) | alpha);
-	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+	gf100_grctx_patch_wr32(chan, 0x405830, (attrib << 16) | alpha);
+	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
@@ -923,19 +917,29 @@ gm107_grctx_generate_attrib(struct gf100_grctx *info)
 			const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 u = 0x418ea0 + (n * 0x04);
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
+
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
-			mmio_wr32(info, o + 0xc0, bs);
-			mmio_wr32(info, o + 0xf4, bo);
+
+			gf100_grctx_patch_wr32(chan, o + 0xc0, bs);
+			gf100_grctx_patch_wr32(chan, o + 0xf4, bo);
 			bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, o + 0xe4, as);
-			mmio_wr32(info, o + 0xf8, ao);
+			gf100_grctx_patch_wr32(chan, o + 0xe4, as);
+			gf100_grctx_patch_wr32(chan, o + 0xf8, ao);
 			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, u, ((bs / 3) << 16) | bs);
+			gf100_grctx_patch_wr32(chan, u, ((bs / 3) << 16) | bs);
 		}
 	}
 }
 
+void
+gm107_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size)
+{
+	gf100_grctx_generate_attrib_cb(chan, addr, size);
+
+	gf100_grctx_patch_wr32(chan, 0x419c2c, 0x10000000 | addr >> 12);
+}
+
 static void
 gm107_grctx_generate_r406500(struct gf100_gr *gr)
 {
@@ -969,6 +973,8 @@ gm107_grctx = {
 	.bundle_token_limit = 0x2c0,
 	.pagepool = gm107_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gm107_grctx_generate_attrib_cb,
 	.attrib = gm107_grctx_generate_attrib,
 	.attrib_nr_max = 0xff0,
 	.attrib_nr = 0xaa0,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
index 013d05a..6b7034b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
@@ -111,6 +111,8 @@ gm200_grctx = {
 	.bundle_token_limit = 0x780,
 	.pagepool = gm107_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gm107_grctx_generate_attrib_cb,
 	.attrib = gm107_grctx_generate_attrib,
 	.attrib_nr_max = 0x600,
 	.attrib_nr = 0x400,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
index cb02b02..b8edccf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
@@ -22,7 +22,7 @@
 #include "ctxgf100.h"
 
 static void
-gm20b_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info)
+gm20b_grctx_generate_main(struct gf100_gr_chan *chan)
 {
 	struct gf100_gr *gr = chan->gr;
 	struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -36,7 +36,8 @@ gm20b_grctx_generate_main(struct gf100_gr_chan *chan, struct gf100_grctx *info)
 
 	idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
 
-	grctx->attrib(info);
+	grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr));
+	grctx->attrib(chan);
 
 	grctx->unkn(gr);
 
@@ -78,6 +79,8 @@ gm20b_grctx = {
 	.bundle_token_limit = 0x1c0,
 	.pagepool = gm107_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gm107_grctx_generate_attrib_cb,
 	.attrib = gm107_grctx_generate_attrib,
 	.attrib_nr_max = 0x600,
 	.attrib_nr = 0x400,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
index b2fa7c9..d8977ca 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
@@ -39,32 +39,21 @@ gp100_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr)
 }
 
 static void
-gp100_grctx_generate_attrib(struct gf100_grctx *info)
+gp100_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32 attrib = grctx->attrib_nr;
-	const int s = 12;
 	const int max_batches = 0xffff;
 	u32 size = grctx->alpha_nr_max * gr->tpc_total;
 	u32 ao = 0;
 	u32 bo = ao + size;
-	int gpc, ppc, b, n = 0;
+	int gpc, ppc, n = 0;
 
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
-		size += grctx->attrib_nr_max * gr->ppc_nr[gpc] * gr->ppc_tpc_max;
-	size = ((size * 0x20) + 128) & ~127;
-	b = mmio_vram(info, size, (1 << s), false);
-
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_refn(info, 0x419c2c, 0x10000000, s, b);
-	mmio_refn(info, 0x419b00, 0x00000000, s, b);
-	mmio_wr32(info, 0x419b04, 0x80000000 | size >> 7);
-	mmio_wr32(info, 0x405830, attrib);
-	mmio_wr32(info, 0x40585c, alpha);
-	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+	gf100_grctx_patch_wr32(chan, 0x405830, attrib);
+	gf100_grctx_patch_wr32(chan, 0x40585c, alpha);
+	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
@@ -72,21 +61,45 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info)
 			const u32 bs = attrib * gr->ppc_tpc_max;
 			const u32 u = 0x418ea0 + (n * 0x04);
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
+
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
-			mmio_wr32(info, o + 0xc0, bs);
-			mmio_wr32(info, o + 0xf4, bo);
-			mmio_wr32(info, o + 0xf0, bs);
+
+			gf100_grctx_patch_wr32(chan, o + 0xc0, bs);
+			gf100_grctx_patch_wr32(chan, o + 0xf4, bo);
+			gf100_grctx_patch_wr32(chan, o + 0xf0, bs);
 			bo += grctx->attrib_nr_max * gr->ppc_tpc_max;
-			mmio_wr32(info, o + 0xe4, as);
-			mmio_wr32(info, o + 0xf8, ao);
+			gf100_grctx_patch_wr32(chan, o + 0xe4, as);
+			gf100_grctx_patch_wr32(chan, o + 0xf8, ao);
 			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, u, bs);
+			gf100_grctx_patch_wr32(chan, u, bs);
 		}
 	}
 
-	mmio_wr32(info, 0x418eec, 0x00000000);
-	mmio_wr32(info, 0x41befc, 0x00000000);
+	gf100_grctx_patch_wr32(chan, 0x418eec, 0x00000000);
+	gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000000);
+}
+
+void
+gp100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size)
+{
+	gm107_grctx_generate_attrib_cb(chan, addr, size);
+
+	gf100_grctx_patch_wr32(chan, 0x419b00, 0x00000000 | addr >> 12);
+	gf100_grctx_patch_wr32(chan, 0x419b04, 0x80000000 | size >> 7);
+}
+
+static u32
+gp100_grctx_generate_attrib_cb_size(struct gf100_gr *gr)
+{
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	u32 size = grctx->alpha_nr_max * gr->tpc_total;
+	int gpc;
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
+		size += grctx->attrib_nr_max * gr->func->ppc_nr * gr->ppc_tpc_max;
+
+	return ((size * 0x20) + 128) & ~127;
 }
 
 void
@@ -120,6 +133,8 @@ gp100_grctx = {
 	.bundle_token_limit = 0x1080,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gp100_grctx_generate_attrib_cb,
 	.attrib = gp100_grctx_generate_attrib,
 	.attrib_nr_max = 0x660,
 	.attrib_nr = 0x440,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
index daee17b..7d372d7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
@@ -37,33 +37,22 @@ gp102_grctx_generate_r408840(struct gf100_gr *gr)
 }
 
 void
-gp102_grctx_generate_attrib(struct gf100_grctx *info)
+gp102_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32 attrib = grctx->attrib_nr;
 	const u32   gfxp = grctx->gfxp_nr;
-	const int s = 12;
 	const int max_batches = 0xffff;
 	u32 size = grctx->alpha_nr_max * gr->tpc_total;
 	u32 ao = 0;
 	u32 bo = ao + size;
-	int gpc, ppc, b, n = 0;
+	int gpc, ppc, n = 0;
 
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
-		size += grctx->gfxp_nr * gr->ppc_nr[gpc] * gr->ppc_tpc_max;
-	size = ((size * 0x20) + 128) & ~127;
-	b = mmio_vram(info, size, (1 << s), false);
-
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_refn(info, 0x419c2c, 0x10000000, s, b);
-	mmio_refn(info, 0x419b00, 0x00000000, s, b);
-	mmio_wr32(info, 0x419b04, 0x80000000 | size >> 7);
-	mmio_wr32(info, 0x405830, attrib);
-	mmio_wr32(info, 0x40585c, alpha);
-	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+	gf100_grctx_patch_wr32(chan, 0x405830, attrib);
+	gf100_grctx_patch_wr32(chan, 0x40585c, alpha);
+	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
@@ -73,22 +62,37 @@ gp102_grctx_generate_attrib(struct gf100_grctx *info)
 			const u32 u = 0x418ea0 + (n * 0x04);
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
 			const u32 p = GPC_UNIT(gpc, 0xc44 + (ppc * 4));
+
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
-			mmio_wr32(info, o + 0xc0, gs);
-			mmio_wr32(info, p, bs);
-			mmio_wr32(info, o + 0xf4, bo);
-			mmio_wr32(info, o + 0xf0, bs);
+
+			gf100_grctx_patch_wr32(chan, o + 0xc0, gs);
+			gf100_grctx_patch_wr32(chan, p, bs);
+			gf100_grctx_patch_wr32(chan, o + 0xf4, bo);
+			gf100_grctx_patch_wr32(chan, o + 0xf0, bs);
 			bo += gs;
-			mmio_wr32(info, o + 0xe4, as);
-			mmio_wr32(info, o + 0xf8, ao);
+			gf100_grctx_patch_wr32(chan, o + 0xe4, as);
+			gf100_grctx_patch_wr32(chan, o + 0xf8, ao);
 			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, u, bs);
+			gf100_grctx_patch_wr32(chan, u, bs);
 		}
 	}
 
-	mmio_wr32(info, 0x4181e4, 0x00000100);
-	mmio_wr32(info, 0x41befc, 0x00000100);
+	gf100_grctx_patch_wr32(chan, 0x4181e4, 0x00000100);
+	gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000100);
+}
+
+u32
+gp102_grctx_generate_attrib_cb_size(struct gf100_gr *gr)
+{
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	u32 size = grctx->alpha_nr_max * gr->tpc_total;
+	int gpc;
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
+		size += grctx->gfxp_nr * gr->func->ppc_nr * gr->ppc_tpc_max;
+
+	return ((size * 0x20) + 127) & ~127;
 }
 
 const struct gf100_grctx_func
@@ -101,6 +105,8 @@ gp102_grctx = {
 	.bundle_token_limit = 0x900,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
+	.attrib_cb = gp100_grctx_generate_attrib_cb,
 	.attrib = gp102_grctx_generate_attrib,
 	.attrib_nr_max = 0x4b0,
 	.attrib_nr = 0x320,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c
index 3b85e3d..90b5f79 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c
@@ -31,6 +31,8 @@ gp104_grctx = {
 	.bundle_token_limit = 0x900,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
+	.attrib_cb = gp100_grctx_generate_attrib_cb,
 	.attrib = gp102_grctx_generate_attrib,
 	.attrib_nr_max = 0x4b0,
 	.attrib_nr = 0x320,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c
index 5060c5e..d191761 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c
@@ -39,6 +39,8 @@ gp107_grctx = {
 	.bundle_token_limit = 0x300,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
+	.attrib_cb = gp100_grctx_generate_attrib_cb,
 	.attrib = gp102_grctx_generate_attrib,
 	.attrib_nr_max = 0x15de,
 	.attrib_nr = 0x540,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c
index 39553d5..c2e5d43 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c
@@ -59,31 +59,20 @@ gv100_grctx_pack_sw_veid_bundle_init[] = {
 };
 
 void
-gv100_grctx_generate_attrib(struct gf100_grctx *info)
+gv100_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32 attrib = grctx->attrib_nr;
 	const u32   gfxp = grctx->gfxp_nr;
-	const int s = 12;
 	u32 size = grctx->alpha_nr_max * gr->tpc_total;
 	u32 ao = 0;
 	u32 bo = ao + size;
-	int gpc, ppc, b, n = 0;
+	int gpc, ppc, n = 0;
 
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
-		size += grctx->gfxp_nr * gr->ppc_nr[gpc] * gr->ppc_tpc_max;
-	size = ((size * 0x20) + 127) & ~127;
-	b = mmio_vram(info, size, (1 << s), false);
-
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_refn(info, 0x419c2c, 0x10000000, s, b);
-	mmio_refn(info, 0x419e00, 0x00000000, s, b);
-	mmio_wr32(info, 0x419e04, 0x80000000 | size >> 7);
-	mmio_wr32(info, 0x405830, attrib);
-	mmio_wr32(info, 0x40585c, alpha);
+	gf100_grctx_patch_wr32(chan, 0x405830, attrib);
+	gf100_grctx_patch_wr32(chan, 0x40585c, alpha);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
@@ -92,21 +81,32 @@ gv100_grctx_generate_attrib(struct gf100_grctx *info)
 			const u32 gs =   gfxp * gr->ppc_tpc_max;
 			const u32 u = 0x418ea0 + (n * 0x04);
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
+
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
-			mmio_wr32(info, o + 0xc0, gs);
-			mmio_wr32(info, o + 0xf4, bo);
-			mmio_wr32(info, o + 0xf0, bs);
+
+			gf100_grctx_patch_wr32(chan, o + 0xc0, gs);
+			gf100_grctx_patch_wr32(chan, o + 0xf4, bo);
+			gf100_grctx_patch_wr32(chan, o + 0xf0, bs);
 			bo += gs;
-			mmio_wr32(info, o + 0xe4, as);
-			mmio_wr32(info, o + 0xf8, ao);
+			gf100_grctx_patch_wr32(chan, o + 0xe4, as);
+			gf100_grctx_patch_wr32(chan, o + 0xf8, ao);
 			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, u, bs);
+			gf100_grctx_patch_wr32(chan, u, bs);
 		}
 	}
 
-	mmio_wr32(info, 0x4181e4, 0x00000100);
-	mmio_wr32(info, 0x41befc, 0x00000100);
+	gf100_grctx_patch_wr32(chan, 0x4181e4, 0x00000100);
+	gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000100);
+}
+
+void
+gv100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size)
+{
+	gm107_grctx_generate_attrib_cb(chan, addr, size);
+
+	gf100_grctx_patch_wr32(chan, 0x419e00, 0x00000000 | addr >> 12);
+	gf100_grctx_patch_wr32(chan, 0x419e04, 0x80000000 | size >> 7);
 }
 
 void
@@ -198,6 +198,8 @@ gv100_grctx = {
 	.bundle_token_limit = 0x1680,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
+	.attrib_cb = gv100_grctx_generate_attrib_cb,
 	.attrib = gv100_grctx_generate_attrib,
 	.attrib_nr_max = 0x6c0,
 	.attrib_nr = 0x480,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c
index 1abad38..1a151e8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c
@@ -74,6 +74,8 @@ tu102_grctx = {
 	.bundle_token_limit = 0xa80,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
+	.attrib_cb = gv100_grctx_generate_attrib_cb,
 	.attrib = gv100_grctx_generate_attrib,
 	.attrib_nr_max = 0x800,
 	.attrib_nr = 0x700,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index 5d1e45b..09141b1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -355,16 +355,11 @@ static void *
 gf100_gr_chan_dtor(struct nvkm_object *object)
 {
 	struct gf100_gr_chan *chan = gf100_gr_chan(object);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(chan->data); i++) {
-		nvkm_vmm_put(chan->vmm, &chan->data[i].vma);
-		nvkm_memory_unref(&chan->data[i].mem);
-	}
 
 	nvkm_vmm_put(chan->vmm, &chan->mmio_vma);
 	nvkm_memory_unref(&chan->mmio);
 
+	nvkm_vmm_put(chan->vmm, &chan->attrib_cb);
 	nvkm_vmm_put(chan->vmm, &chan->unknown);
 	nvkm_vmm_put(chan->vmm, &chan->bundle_cb);
 	nvkm_vmm_put(chan->vmm, &chan->pagepool);
@@ -384,12 +379,10 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		  struct nvkm_object **pobject)
 {
 	struct gf100_gr *gr = gf100_gr(base);
-	struct gf100_gr_data *data = gr->mmio_data;
-	struct gf100_gr_mmio *mmio = gr->mmio_list;
 	struct gf100_gr_chan *chan;
 	struct gf100_vmm_map_v0 args = { .priv = 1 };
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	int ret, i;
+	int ret;
 
 	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
 		return -ENOMEM;
@@ -416,6 +409,22 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 	if (ret)
 		return ret;
 
+	/* Map attribute circular buffer. */
+	ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->attrib_cb), &chan->attrib_cb);
+	if (ret)
+		return ret;
+
+	if (device->card_type < GP100) {
+		ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb, NULL, 0);
+		if (ret)
+			return ret;
+	} else {
+		ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb,
+				      &args, sizeof(args));;
+		if (ret)
+			return ret;
+	}
+
 	/* Map some context buffer of unknown purpose. */
 	if (gr->func->grctx->unknown_size) {
 		ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->unknown), &chan->unknown);
@@ -457,47 +466,12 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 	if (ret)
 		return ret;
 
-	/* allocate buffers referenced by mmio list */
-	for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) {
-		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
-				      data->size, data->align, false,
-				      &chan->data[i].mem);
-		if (ret)
-			return ret;
-
-		ret = nvkm_vmm_get(fifoch->vmm, 12,
-				   nvkm_memory_size(chan->data[i].mem),
-				   &chan->data[i].vma);
-		if (ret)
-			return ret;
-
-		args.priv = data->priv;
-
-		ret = nvkm_memory_map(chan->data[i].mem, 0, chan->vmm,
-				      chan->data[i].vma, &args, sizeof(args));
-		if (ret)
-			return ret;
-
-		data++;
-	}
-
 	/* finally, fill in the mmio list and point the context at it */
 	nvkm_kmap(chan->mmio);
 	gr->func->grctx->pagepool(chan, chan->pagepool->addr);
 	gr->func->grctx->bundle(chan, chan->bundle_cb->addr, gr->func->grctx->bundle_size);
-	for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) {
-		u32 addr = mmio->addr;
-		u32 data = mmio->data;
-
-		if (mmio->buffer >= 0) {
-			u64 info = chan->data[mmio->buffer].vma->addr;
-			data |= info >> mmio->shift;
-		}
-
-		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
-		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
-		mmio++;
-	}
+	gr->func->grctx->attrib_cb(chan, chan->attrib_cb->addr, gr->func->grctx->attrib_cb_size(gr));
+	gr->func->grctx->attrib(chan);
 	if (gr->func->grctx->patch_ltc)
 		gr->func->grctx->patch_ltc(chan);
 	if (gr->func->grctx->unknown_size)
@@ -2015,6 +1989,11 @@ gf100_gr_oneinit(struct nvkm_gr *base)
 	if (ret)
 		return ret;
 
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->attrib_cb_size(gr),
+			      0x1000, false, &gr->attrib_cb);
+	if (ret)
+		return ret;
+
 	if (gr->func->grctx->unknown_size) {
 		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->unknown_size,
 				      0x100, false, &gr->unknown);
@@ -2092,6 +2071,7 @@ gf100_gr_dtor(struct nvkm_gr *base)
 	kfree(gr->data);
 
 	nvkm_memory_unref(&gr->unknown);
+	nvkm_memory_unref(&gr->attrib_cb);
 	nvkm_memory_unref(&gr->bundle_cb);
 	nvkm_memory_unref(&gr->pagepool);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index ae3eee65..7f467f6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -44,19 +44,6 @@ struct nvkm_acr_lsfw;
 #define PPC_UNIT(t, m, r) (0x503000 + (t) * 0x8000 + (m) * 0x200 + (r))
 #define TPC_UNIT(t, m, r) (0x504000 + (t) * 0x8000 + (m) * 0x800 + (r))
 
-struct gf100_gr_data {
-	u32 size;
-	u32 align;
-	bool priv;
-};
-
-struct gf100_gr_mmio {
-	u32 addr;
-	u32 data;
-	u32 shift;
-	int buffer;
-};
-
 struct gf100_gr_zbc_color {
 	u32 format;
 	u32 ds[4];
@@ -123,6 +110,7 @@ struct gf100_gr {
 
 	struct nvkm_memory *pagepool;
 	struct nvkm_memory *bundle_cb;
+	struct nvkm_memory *attrib_cb;
 	struct nvkm_memory *unknown;
 
 	u8 screen_tile_row_offset;
@@ -134,8 +122,6 @@ struct gf100_gr {
 	} sm[TPC_MAX];
 	u8 sm_nr;
 
-	struct gf100_gr_data mmio_data[4];
-	struct gf100_gr_mmio mmio_list[4096/8];
 	u32  size;
 	u32 *data;
 	u32 size_zcull;
@@ -264,16 +250,12 @@ struct gf100_gr_chan {
 
 	struct nvkm_vma *pagepool;
 	struct nvkm_vma *bundle_cb;
+	struct nvkm_vma *attrib_cb;
 	struct nvkm_vma *unknown;
 
 	struct nvkm_memory *mmio;
 	struct nvkm_vma *mmio_vma;
 	int mmio_nr;
-
-	struct {
-		struct nvkm_memory *mem;
-		struct nvkm_vma *vma;
-	} data[4];
 };
 
 void gf100_gr_ctxctl_debug(struct gf100_gr *);
-- 
2.7.4