From fc36076441bae141893bd79899d19aa1b5fdf524 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 8 May 2018 20:39:46 +1000 Subject: [PATCH] drm/nouveau/gr/gf100-: virtualise tpc_mask + apply fixes from traces We weren't placing higher TPC IDs in the right place on some configurations. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c | 4 ++++ drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h | 12 ++++++------ drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c | 22 +++++++++++++--------- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c | 2 +- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c | 22 ++++++++-------------- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c | 2 ++ drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c | 2 ++ drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h | 2 ++ drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c | 1 + drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c | 2 ++ drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c | 2 ++ drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c | 2 ++ drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c | 2 ++ 13 files changed, 47 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c index 3793d48..a52f27f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c @@ -1368,6 +1368,10 @@ gf100_grctx_generate_floorsweep(struct gf100_gr *gr) func->gpc_tpc_nr(gr); if (func->r419f78) func->r419f78(gr); + if (func->tpc_mask) + func->tpc_mask(gr); + if (func->smid_config) + func->smid_config(gr); } void diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h index e84b46f..d319e76 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h @@ -61,6 +61,8 @@ struct gf100_grctx_func { void (*r406500)(struct gf100_gr *); void (*gpc_tpc_nr)(struct gf100_gr *); void (*r419f78)(struct gf100_gr *); + void (*tpc_mask)(struct gf100_gr *); + void (*smid_config)(struct gf100_gr *); }; extern const struct gf100_grctx_func gf100_grctx; @@ -103,11 +105,6 @@ void gk104_grctx_generate_pagepool(struct gf100_grctx *); void gk104_grctx_generate_patch_ltc(struct gf100_grctx *); void gk104_grctx_generate_unkn(struct gf100_gr *); -void gm107_grctx_generate_bundle(struct gf100_grctx *); -void gm107_grctx_generate_pagepool(struct gf100_grctx *); -void gm107_grctx_generate_attrib(struct gf100_grctx *); -void gm107_grctx_generate_sm_id(struct gf100_gr *, int, int, int); - extern const struct gf100_grctx_func gk110_grctx; extern const struct gf100_grctx_func gk110b_grctx; extern const struct gf100_grctx_func gk208_grctx; @@ -116,17 +113,20 @@ extern const struct gf100_grctx_func gm107_grctx; void gm107_grctx_generate_bundle(struct gf100_grctx *); void gm107_grctx_generate_pagepool(struct gf100_grctx *); void gm107_grctx_generate_attrib(struct gf100_grctx *); +void gm107_grctx_generate_sm_id(struct gf100_gr *, int, int, int); extern const struct gf100_grctx_func gm200_grctx; void gm200_grctx_generate_dist_skip_table(struct gf100_gr *); void gm200_grctx_generate_r406500(struct gf100_gr *); -void gm200_grctx_generate_405b60(struct gf100_gr *); +void gm200_grctx_generate_tpc_mask(struct gf100_gr *); +void gm200_grctx_generate_smid_config(struct gf100_gr *); extern const struct gf100_grctx_func gm20b_grctx; extern const struct gf100_grctx_func gp100_grctx; void gp100_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *); void gp100_grctx_generate_pagepool(struct gf100_grctx *); +void gp100_grctx_generate_smid_config(struct gf100_gr *); extern const struct gf100_grctx_func gp102_grctx; void gp102_grctx_generate_attrib(struct gf100_grctx *); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c index 3ba5e95..f1e87b9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c @@ -28,7 +28,7 @@ ******************************************************************************/ void -gm200_grctx_generate_405b60(struct gf100_gr *gr) +gm200_grctx_generate_smid_config(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4); @@ -60,6 +60,15 @@ gm200_grctx_generate_405b60(struct gf100_gr *gr) } void +gm200_grctx_generate_tpc_mask(struct gf100_gr *gr) +{ + u32 tmp, i; + for (tmp = 0, i = 0; i < gr->gpc_nr; i++) + tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * gr->func->tpc_nr); + nvkm_wr32(gr->base.engine.subdev.device, 0x4041c4, tmp); +} + +void gm200_grctx_generate_r406500(struct gf100_gr *gr) { nvkm_wr32(gr->base.engine.subdev.device, 0x406500, 0x00000000); @@ -70,8 +79,7 @@ gm200_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) { struct nvkm_device *device = gr->base.engine.subdev.device; const struct gf100_grctx_func *grctx = gr->func->grctx; - u32 idle_timeout, tmp; - int i; + u32 idle_timeout; gf100_gr_mmio(gr, gr->fuc_sw_ctx); @@ -84,12 +92,6 @@ gm200_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) gf100_grctx_generate_floorsweep(gr); - for (tmp = 0, i = 0; i < gr->gpc_nr; i++) - tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4); - nvkm_wr32(device, 0x4041c4, tmp); - - gm200_grctx_generate_405b60(gr); - gf100_gr_icmd(gr, gr->fuc_bundle); nvkm_wr32(device, 0x404154, idle_timeout); gf100_gr_mthd(gr, gr->fuc_method); @@ -140,4 +142,6 @@ gm200_grctx = { .dist_skip_table = gm200_grctx_generate_dist_skip_table, .r406500 = gm200_grctx_generate_r406500, .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .tpc_mask = gm200_grctx_generate_tpc_mask, + .smid_config = gm200_grctx_generate_smid_config, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c index 3dd4e18..a1d9e11 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c @@ -52,7 +52,7 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4); nvkm_wr32(device, 0x4041c4, tmp); - gm200_grctx_generate_405b60(gr); + gm200_grctx_generate_smid_config(gr); gf100_gr_wait_idle(gr); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c index c48617b..821219a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c @@ -89,13 +89,12 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info) mmio_wr32(info, 0x41befc, 0x00000000); } -static void -gp100_grctx_generate_405b60(struct gf100_gr *gr) +void +gp100_grctx_generate_smid_config(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4); - u32 dist[TPC_MAX / 4] = {}; - u32 gpcs[GPC_MAX * 2] = {}; + u32 dist[TPC_MAX / 4] = {}, gpcs[16] = {}; u8 tpcnr[GPC_MAX]; int tpc, gpc, i; @@ -112,12 +111,12 @@ gp100_grctx_generate_405b60(struct gf100_gr *gr) tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8); - gpcs[gpc + (gr->gpc_nr * (tpc / 4))] |= i << (tpc * 8); + gpcs[gpc + (gr->func->gpc_nr * (tpc / 4))] |= i << (tpc * 8); } for (i = 0; i < dist_nr; i++) nvkm_wr32(device, 0x405b60 + (i * 4), dist[i]); - for (i = 0; i < gr->gpc_nr * 2; i++) + for (i = 0; i < ARRAY_SIZE(gpcs); i++) nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]); } @@ -126,8 +125,7 @@ gp100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) { struct nvkm_device *device = gr->base.engine.subdev.device; const struct gf100_grctx_func *grctx = gr->func->grctx; - u32 idle_timeout, tmp; - int i; + u32 idle_timeout; gf100_gr_mmio(gr, gr->fuc_sw_ctx); @@ -140,12 +138,6 @@ gp100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) gf100_grctx_generate_floorsweep(gr); - for (tmp = 0, i = 0; i < gr->gpc_nr; i++) - tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 5); - nvkm_wr32(device, 0x4041c4, tmp); - - gp100_grctx_generate_405b60(gr); - gf100_gr_icmd(gr, gr->fuc_bundle); nvkm_wr32(device, 0x404154, idle_timeout); gf100_gr_mthd(gr, gr->fuc_method); @@ -171,4 +163,6 @@ gp100_grctx = { .dist_skip_table = gm200_grctx_generate_dist_skip_table, .r406500 = gm200_grctx_generate_r406500, .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .tpc_mask = gm200_grctx_generate_tpc_mask, + .smid_config = gp100_grctx_generate_smid_config, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c index ec4fbe8..611819f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c @@ -99,4 +99,6 @@ gp102_grctx = { .dist_skip_table = gm200_grctx_generate_dist_skip_table, .r406500 = gm200_grctx_generate_r406500, .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .tpc_mask = gm200_grctx_generate_tpc_mask, + .smid_config = gp100_grctx_generate_smid_config, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c index 84c98cd..d908317 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c @@ -49,4 +49,6 @@ gp107_grctx = { .dist_skip_table = gm200_grctx_generate_dist_skip_table, .r406500 = gm200_grctx_generate_r406500, .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .tpc_mask = gm200_grctx_generate_tpc_mask, + .smid_config = gp100_grctx_generate_smid_config, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h index c2a1b2a..31109ce 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h @@ -154,6 +154,8 @@ struct gf100_gr_func { struct gf100_gr_ucode *ucode; } gpccs; int (*rops)(struct gf100_gr *); + int gpc_nr; + int tpc_nr; int ppc_nr; const struct gf100_grctx_func *grctx; const struct nvkm_therm_clkgate_pack *clkgate_pack; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c index 4dcb56b..ae0eaf8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c @@ -134,6 +134,7 @@ gm200_gr = { .init_shader_exceptions = gm107_gr_init_shader_exceptions, .init_400054 = gm107_gr_init_400054, .rops = gm200_gr_rops, + .tpc_nr = 4, .ppc_nr = 2, .grctx = &gm200_grctx, .sclass = { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c index cc507e8..3addbc1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c @@ -80,6 +80,8 @@ gp100_gr = { .init_504430 = gm107_gr_init_504430, .init_shader_exceptions = gp100_gr_init_shader_exceptions, .rops = gm200_gr_rops, + .gpc_nr = 6, + .tpc_nr = 5, .ppc_nr = 2, .grctx = &gp100_grctx, .sclass = { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c index 86d1ff7..ea99c15 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c @@ -58,6 +58,8 @@ gp102_gr = { .init_504430 = gm107_gr_init_504430, .init_shader_exceptions = gp100_gr_init_shader_exceptions, .rops = gm200_gr_rops, + .gpc_nr = 6, + .tpc_nr = 5, .ppc_nr = 3, .grctx = &gp102_grctx, .sclass = { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c index 14007b5..09cba53 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c @@ -44,6 +44,8 @@ gp107_gr = { .init_504430 = gm107_gr_init_504430, .init_shader_exceptions = gp100_gr_init_shader_exceptions, .rops = gm200_gr_rops, + .gpc_nr = 2, + .tpc_nr = 3, .ppc_nr = 1, .grctx = &gp107_grctx, .sclass = { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c index 450a96d..4972bf8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c @@ -42,6 +42,8 @@ gp10b_gr = { .init_504430 = gm107_gr_init_504430, .init_shader_exceptions = gp100_gr_init_shader_exceptions, .rops = gm200_gr_rops, + .gpc_nr = 1, + .tpc_nr = 2, .ppc_nr = 1, .grctx = &gp102_grctx, .sclass = { -- 2.7.4