nvkm_wr32(device, 0x40780c + (i * 4), data[i]);
}
+static const u32
+gf100_grctx_alpha_beta_map[17][32] = {
+ [1] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ },
+ [2] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ },
+ //XXX: 3
+ [4] = {
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ },
+ //XXX: 5
+ //XXX: 6
+ [7] = {
+ 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6,
+ },
+ [8] = {
+ 1, 1, 1,
+ 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6,
+ 7, 7, 7,
+ },
+ //XXX: 9
+ //XXX: 10
+ [11] = {
+ 1, 1,
+ 2, 2, 2, 2,
+ 3, 3, 3,
+ 4, 4, 4, 4,
+ 5, 5, 5,
+ 6, 6, 6,
+ 7, 7, 7, 7,
+ 8, 8, 8,
+ 9, 9, 9, 9,
+ 10, 10,
+ },
+ //XXX: 12
+ //XXX: 13
+ [14] = {
+ 1, 1,
+ 2, 2,
+ 3, 3, 3,
+ 4, 4, 4,
+ 5, 5,
+ 6, 6, 6,
+ 7, 7,
+ 8, 8, 8,
+ 9, 9,
+ 10, 10, 10,
+ 11, 11, 11,
+ 12, 12,
+ 13, 13,
+ },
+ [15] = {
+ 1, 1,
+ 2, 2,
+ 3, 3,
+ 4, 4, 4,
+ 5, 5,
+ 6, 6, 6,
+ 7, 7,
+ 8, 8,
+ 9, 9, 9,
+ 10, 10,
+ 11, 11, 11,
+ 12, 12,
+ 13, 13,
+ 14, 14,
+ },
+ [16] = {
+ 1, 1,
+ 2, 2,
+ 3, 3,
+ 4, 4,
+ 5, 5,
+ 6, 6, 6,
+ 7, 7,
+ 8, 8,
+ 9, 9,
+ 10, 10, 10,
+ 11, 11,
+ 12, 12,
+ 13, 13,
+ 14, 14,
+ 15, 15,
+ },
+};
+
void
-gf100_grctx_generate_r406800(struct gf100_gr *gr)
+gf100_grctx_generate_alpha_beta_tables(struct gf100_gr *gr)
{
- struct nvkm_device *device = gr->base.engine.subdev.device;
- u64 tpc_mask = 0, tpc_set = 0;
- u8 tpcnr[GPC_MAX];
- int gpc, tpc;
- int i, a, b;
+ struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+ struct nvkm_device *device = subdev->device;
+ int i, gpc;
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
- for (gpc = 0; gpc < gr->gpc_nr; gpc++)
- tpc_mask |= ((1ULL << gr->tpc_nr[gpc]) - 1) << (gpc * 8);
-
- for (i = 0, gpc = -1, b = -1; i < 32; i++) {
- a = (i * (gr->tpc_total - 1)) / 32;
- if (a != b) {
- b = a;
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
- tpc_set |= 1ULL << ((gpc * 8) + tpc);
+ for (i = 0; i < 32; i++) {
+ u32 atarget = gf100_grctx_alpha_beta_map[gr->tpc_total][i];
+ u32 abits[GPC_MAX] = {}, amask = 0, bmask = 0;
+
+ if (!atarget) {
+ nvkm_warn(subdev, "missing alpha/beta mapping table\n");
+ atarget = max_t(u32, gr->tpc_total * i / 32, 1);
}
- nvkm_wr32(device, 0x406800 + (i * 0x20), lower_32_bits(tpc_set));
- nvkm_wr32(device, 0x406c00 + (i * 0x20), lower_32_bits(tpc_set ^ tpc_mask));
- if (gr->gpc_nr > 4) {
- nvkm_wr32(device, 0x406804 + (i * 0x20), upper_32_bits(tpc_set));
- nvkm_wr32(device, 0x406c04 + (i * 0x20), upper_32_bits(tpc_set ^ tpc_mask));
+ while (atarget) {
+ for (gpc = 0; atarget && gpc < gr->gpc_nr; gpc++) {
+ if (abits[gpc] < gr->tpc_nr[gpc]) {
+ abits[gpc]++;
+ atarget--;
+ }
+ }
}
+
+ for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+ u32 bbits = gr->tpc_nr[gpc] - abits[gpc];
+ amask |= ((1 << abits[gpc]) - 1) << (gpc * 8);
+ bmask |= ((1 << bbits) - 1) << abits[gpc] << (gpc * 8);
+ }
+
+ nvkm_wr32(device, 0x406800 + (i * 0x20), amask);
+ nvkm_wr32(device, 0x406c00 + (i * 0x20), bmask);
}
}
func->r4060a8(gr);
func->rop_mapping(gr);
+
+ if (func->alpha_beta_tables)
+ func->alpha_beta_tables(gr);
}
void
grctx->unkn(gr);
gf100_grctx_generate_floorsweep(gr);
- gf100_grctx_generate_r406800(gr);
gf100_gr_icmd(gr, grctx->icmd);
nvkm_wr32(device, 0x404154, idle_timeout);
.tpc_nr = gf100_grctx_generate_tpc_nr,
.r4060a8 = gf100_grctx_generate_r4060a8,
.rop_mapping = gf100_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
};
void (*tpc_nr)(struct gf100_gr *, int gpc);
void (*r4060a8)(struct gf100_gr *);
void (*rop_mapping)(struct gf100_gr *);
+ void (*alpha_beta_tables)(struct gf100_gr *);
};
extern const struct gf100_grctx_func gf100_grctx;
void gf100_grctx_generate_attrib(struct gf100_grctx *);
void gf100_grctx_generate_unkn(struct gf100_gr *);
void gf100_grctx_generate_floorsweep(struct gf100_gr *);
-void gf100_grctx_generate_r406800(struct gf100_gr *);
void gf100_grctx_generate_sm_id(struct gf100_gr *, int, int, int);
void gf100_grctx_generate_tpc_nr(struct gf100_gr *, int);
void gf100_grctx_generate_r4060a8(struct gf100_gr *);
void gf100_grctx_generate_rop_mapping(struct gf100_gr *);
+void gf100_grctx_generate_alpha_beta_tables(struct gf100_gr *);
extern const struct gf100_grctx_func gf108_grctx;
void gf108_grctx_generate_attrib(struct gf100_grctx *);
extern const struct gf100_grctx_func gf119_grctx;
extern const struct gf100_grctx_func gk104_grctx;
+void gk104_grctx_generate_alpha_beta_tables(struct gf100_gr *);
+
extern const struct gf100_grctx_func gk20a_grctx;
void gk104_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *);
void gk104_grctx_generate_bundle(struct gf100_grctx *);
.tpc_nr = gf100_grctx_generate_tpc_nr,
.r4060a8 = gf100_grctx_generate_r4060a8,
.rop_mapping = gf100_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
};
.tpc_nr = gf100_grctx_generate_tpc_nr,
.r4060a8 = gf100_grctx_generate_r4060a8,
.rop_mapping = gf100_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
};
.tpc_nr = gf100_grctx_generate_tpc_nr,
.r4060a8 = gf100_grctx_generate_r4060a8,
.rop_mapping = gf100_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
};
grctx->unkn(gr);
gf100_grctx_generate_floorsweep(gr);
- gf100_grctx_generate_r406800(gr);
for (i = 0; i < 8; i++)
nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
.tpc_nr = gf100_grctx_generate_tpc_nr,
.r4060a8 = gf100_grctx_generate_r4060a8,
.rop_mapping = gf117_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
};
.tpc_nr = gf100_grctx_generate_tpc_nr,
.r4060a8 = gf100_grctx_generate_r4060a8,
.rop_mapping = gf100_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
};
grctx->unkn(gr);
gf100_grctx_generate_floorsweep(gr);
- gf100_grctx_generate_r406800(gr);
for (i = 0; i < 8; i++)
nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
nvkm_mask(device, 0x41be10, 0x00800000, 0x00800000);
}
+void
+gk104_grctx_generate_alpha_beta_tables(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ int i, j, gpc, ppc;
+
+ for (i = 0; i < 32; i++) {
+ u32 atarget = max_t(u32, gr->tpc_total * i / 32, 1);
+ u32 btarget = gr->tpc_total - atarget;
+ bool alpha = atarget < btarget;
+ u64 amask = 0, bmask = 0;
+
+ for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+ for (ppc = 0; ppc < gr->func->ppc_nr; ppc++) {
+ u32 ppc_tpcs = gr->ppc_tpc_nr[gpc][ppc];
+ u32 abits, bbits, pmask;
+
+ if (alpha) {
+ abits = atarget ? ppc_tpcs : 0;
+ bbits = ppc_tpcs - abits;
+ } else {
+ bbits = btarget ? ppc_tpcs : 0;
+ abits = ppc_tpcs - bbits;
+ }
+
+ pmask = gr->ppc_tpc_mask[gpc][ppc];
+ while (ppc_tpcs-- > abits)
+ pmask &= pmask - 1;
+ amask |= (u64)pmask << (gpc * 8);
+
+ pmask ^= gr->ppc_tpc_mask[gpc][ppc];
+ bmask |= (u64)pmask << (gpc * 8);
+
+ atarget -= min(abits, atarget);
+ btarget -= min(bbits, btarget);
+ if ((abits > 0) || (bbits > 0))
+ alpha = !alpha;
+ }
+ }
+
+ for (j = 0; j < gr->gpc_nr; j += 4, amask >>= 32, bmask >>= 32) {
+ nvkm_wr32(device, 0x406800 + (i * 0x20) + j, amask);
+ nvkm_wr32(device, 0x406c00 + (i * 0x20) + j, bmask);
+ }
+ }
+}
+
const struct gf100_grctx_func
gk104_grctx = {
.main = gk104_grctx_generate_main,
.sm_id = gf100_grctx_generate_sm_id,
.tpc_nr = gf100_grctx_generate_tpc_nr,
.rop_mapping = gf117_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
};
.sm_id = gf100_grctx_generate_sm_id,
.tpc_nr = gf100_grctx_generate_tpc_nr,
.rop_mapping = gf117_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
};
.sm_id = gf100_grctx_generate_sm_id,
.tpc_nr = gf100_grctx_generate_tpc_nr,
.rop_mapping = gf117_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
};
.sm_id = gf100_grctx_generate_sm_id,
.tpc_nr = gf100_grctx_generate_tpc_nr,
.rop_mapping = gf117_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
};
grctx->unkn(gr);
gf100_grctx_generate_floorsweep(gr);
- gf100_grctx_generate_r406800(gr);
for (i = 0; i < 8; i++)
nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
.sm_id = gf100_grctx_generate_sm_id,
.tpc_nr = gf100_grctx_generate_tpc_nr,
.rop_mapping = gf117_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
};
grctx->unkn(gr);
gf100_grctx_generate_floorsweep(gr);
- gf100_grctx_generate_r406800(gr);
nvkm_wr32(device, 0x4064d0, 0x00000001);
for (i = 1; i < 8; i++)
.sm_id = gm107_grctx_generate_sm_id,
.tpc_nr = gf100_grctx_generate_tpc_nr,
.rop_mapping = gf117_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
};
gr->tpc_total += gr->tpc_nr[i];
gr->ppc_nr[i] = gr->func->ppc_nr;
for (j = 0; j < gr->ppc_nr[i]; j++) {
- u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
- if (mask)
- gr->ppc_mask[i] |= (1 << j);
- gr->ppc_tpc_nr[i][j] = hweight8(mask);
+ gr->ppc_tpc_mask[i][j] =
+ nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
+ if (gr->ppc_tpc_mask[i][j] == 0)
+ continue;
+ gr->ppc_mask[i] |= (1 << j);
+ gr->ppc_tpc_nr[i][j] = hweight8(gr->ppc_tpc_mask[i][j]);
}
}
u8 tpc_total;
u8 ppc_nr[GPC_MAX];
u8 ppc_mask[GPC_MAX];
+ u8 ppc_tpc_mask[GPC_MAX][4];
u8 ppc_tpc_nr[GPC_MAX][4];
struct gf100_gr_data mmio_data[4];