From 60770fa28bd7d69097d3a186fe8cfa1ec21c9c1d Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 8 May 2018 20:39:46 +1000 Subject: [PATCH] drm/nouveau/gr/gf100-: virtualise dist_skip_table + improve algorithm The algorithm for GM200 and newer matches RM for all the boards I have, but I don't have enough data to try and figure something out for earlier boards, so these will still write zeroes to the table as we did before. The code in NVGPU isn't helpful here, it appears to handle specific cases. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c | 2 ++ drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h | 3 +++ drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c | 15 +++++++++---- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c | 5 +---- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c | 1 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c | 1 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c | 1 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c | 5 +---- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c | 25 ++++++++++++++++++++-- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c | 3 +-- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c | 1 + drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c | 1 + drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 3 +++ drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h | 1 + 14 files changed, 51 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c index cdf74f3..176be71 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c @@ -1360,6 +1360,8 @@ gf100_grctx_generate_floorsweep(struct gf100_gr *gr) func->alpha_beta_tables(gr); if (func->max_ways_evict) func->max_ways_evict(gr); + if (func->dist_skip_table) + func->dist_skip_table(gr); } void diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h index 41cb8754..dd1c73b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h @@ -57,6 +57,7 @@ struct gf100_grctx_func { void (*rop_mapping)(struct gf100_gr *); void (*alpha_beta_tables)(struct gf100_gr *); void (*max_ways_evict)(struct gf100_gr *); + void (*dist_skip_table)(struct gf100_gr *); }; extern const struct gf100_grctx_func gf100_grctx; @@ -84,6 +85,7 @@ extern const struct gf100_grctx_func gf110_grctx; extern const struct gf100_grctx_func gf117_grctx; void gf117_grctx_generate_attrib(struct gf100_grctx *); void gf117_grctx_generate_rop_mapping(struct gf100_gr *); +void gf117_grctx_generate_dist_skip_table(struct gf100_gr *); extern const struct gf100_grctx_func gf119_grctx; @@ -112,6 +114,7 @@ void gm107_grctx_generate_pagepool(struct gf100_grctx *); void gm107_grctx_generate_attrib(struct gf100_grctx *); extern const struct gf100_grctx_func gm200_grctx; +void gm200_grctx_generate_dist_skip_table(struct gf100_gr *); void gm200_grctx_generate_405b60(struct gf100_gr *); extern const struct gf100_grctx_func gm20b_grctx; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c index 423b097..b3f4127 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c @@ -180,6 +180,16 @@ gf117_grctx_pack_ppc[] = { ******************************************************************************/ void +gf117_grctx_generate_dist_skip_table(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + int i; + + for (i = 0; i < 8; i++) + nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); +} + +void gf117_grctx_generate_rop_mapping(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; @@ -282,7 +292,6 @@ gf117_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) struct nvkm_device *device = gr->base.engine.subdev.device; const struct gf100_grctx_func *grctx = gr->func->grctx; u32 idle_timeout; - int i; nvkm_mc_unk260(device, 0); @@ -301,9 +310,6 @@ gf117_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) gf100_grctx_generate_floorsweep(gr); - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); - gf100_gr_icmd(gr, grctx->icmd); nvkm_wr32(device, 0x404154, idle_timeout); gf100_gr_mthd(gr, grctx->mthd); @@ -336,4 +342,5 @@ gf117_grctx = { .rop_mapping = gf117_grctx_generate_rop_mapping, .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables, .max_ways_evict = gf100_grctx_generate_max_ways_evict, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c index 25576c1..1216931 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c @@ -898,7 +898,6 @@ gk104_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) struct nvkm_device *device = gr->base.engine.subdev.device; const struct gf100_grctx_func *grctx = gr->func->grctx; u32 idle_timeout; - int i; nvkm_mc_unk260(device, 0); @@ -917,9 +916,6 @@ gk104_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) gf100_grctx_generate_floorsweep(gr); - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); - nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr); nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000); @@ -1006,4 +1002,5 @@ gk104_grctx = { .tpc_nr = gf100_grctx_generate_tpc_nr, .rop_mapping = gf117_grctx_generate_rop_mapping, .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c index 284570a..e6a54dc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c @@ -835,4 +835,5 @@ gk110_grctx = { .tpc_nr = gf100_grctx_generate_tpc_nr, .rop_mapping = gf117_grctx_generate_rop_mapping, .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c index ffd8cf9..ef82ebe 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c @@ -96,4 +96,5 @@ gk110b_grctx = { .tpc_nr = gf100_grctx_generate_tpc_nr, .rop_mapping = gf117_grctx_generate_rop_mapping, .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c index e5e4d4dc..226f8aa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c @@ -557,4 +557,5 @@ gk208_grctx = { .tpc_nr = gf100_grctx_generate_tpc_nr, .rop_mapping = gf117_grctx_generate_rop_mapping, .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c index c209bf3..cdf9d60 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c @@ -945,7 +945,6 @@ gm107_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) struct nvkm_device *device = gr->base.engine.subdev.device; const struct gf100_grctx_func *grctx = gr->func->grctx; u32 idle_timeout; - int i; gf100_gr_mmio(gr, grctx->hub); gf100_gr_mmio(gr, grctx->gpc); @@ -962,9 +961,6 @@ gm107_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) gf100_grctx_generate_floorsweep(gr); - nvkm_wr32(device, 0x4064d0, 0x00000001); - for (i = 1; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); nvkm_wr32(device, 0x406500, 0x00000001); nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr); @@ -1005,4 +1001,5 @@ gm107_grctx = { .tpc_nr = gf100_grctx_generate_tpc_nr, .rop_mapping = gf117_grctx_generate_rop_mapping, .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c index cfccd75..6891206 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c @@ -78,8 +78,6 @@ gm200_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) gf100_grctx_generate_floorsweep(gr); - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); nvkm_wr32(device, 0x406500, 0x00000000); nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr); @@ -98,6 +96,28 @@ gm200_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) nvkm_mask(device, 0x418e4c, 0xffffffff, 0x70000000); } +void +gm200_grctx_generate_dist_skip_table(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + u32 data[8] = {}; + int gpc, ppc, i; + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) { + u8 ppc_tpcs = gr->ppc_tpc_nr[gpc][ppc]; + u8 ppc_tpcm = gr->ppc_tpc_mask[gpc][ppc]; + while (ppc_tpcs-- > gr->ppc_tpc_min) + ppc_tpcm &= ppc_tpcm - 1; + ppc_tpcm ^= gr->ppc_tpc_mask[gpc][ppc]; + ((u8 *)data)[gpc] |= ppc_tpcm; + } + } + + for (i = 0; i < ARRAY_SIZE(data); i++) + nvkm_wr32(device, 0x4064d0 + (i * 0x04), data[i]); +} + const struct gf100_grctx_func gm200_grctx = { .main = gm200_grctx_generate_main, @@ -115,4 +135,5 @@ gm200_grctx = { .alpha_nr = 0x1000, .sm_id = gm107_grctx_generate_sm_id, .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c index e099907..1a3d0c5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c @@ -140,8 +140,6 @@ gp100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) gf100_grctx_generate_floorsweep(gr); - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); nvkm_wr32(device, 0x406500, 0x00000000); nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr); @@ -174,4 +172,5 @@ gp100_grctx = { .alpha_nr = 0x800, .sm_id = gm107_grctx_generate_sm_id, .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c index 553a609..2aeabb3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c @@ -96,4 +96,5 @@ gp102_grctx = { .alpha_nr = 0x800, .sm_id = gm107_grctx_generate_sm_id, .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c index db3fff8..4aea2f6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c @@ -46,4 +46,5 @@ gp107_grctx = { .alpha_nr = 0x800, .sm_id = gm107_grctx_generate_sm_id, .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index fe3b44d..dd4a410 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -1685,6 +1685,9 @@ gf100_gr_oneinit(struct nvkm_gr *base) continue; gr->ppc_mask[i] |= (1 << j); gr->ppc_tpc_nr[i][j] = hweight8(gr->ppc_tpc_mask[i][j]); + if (gr->ppc_tpc_min == 0 || + gr->ppc_tpc_min > gr->ppc_tpc_nr[i][j]) + gr->ppc_tpc_min = gr->ppc_tpc_nr[i][j]; } } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h index 6f7a786..c2a1b2a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h @@ -105,6 +105,7 @@ struct gf100_gr { u8 ppc_mask[GPC_MAX]; u8 ppc_tpc_mask[GPC_MAX][4]; u8 ppc_tpc_nr[GPC_MAX][4]; + u8 ppc_tpc_min; struct gf100_gr_data mmio_data[4]; struct gf100_gr_mmio mmio_list[4096/8]; -- 2.7.4