From ce188bb252c0d8cab8a2763e8365985df1a0902b Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 29 Apr 2013 10:56:36 +0800 Subject: [PATCH] ilo: move device limits to ilo_dev_info or to GPEs It seems a bit weird to have device limits in a context. --- src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c | 11 ++---- src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c | 8 ++-- src/gallium/drivers/ilo/ilo_common.h | 1 + src/gallium/drivers/ilo/ilo_context.c | 38 ------------------ src/gallium/drivers/ilo/ilo_context.h | 9 ----- src/gallium/drivers/ilo/ilo_gpe_gen6.c | 53 +++++++++++++++++++++++--- src/gallium/drivers/ilo/ilo_gpe_gen6.h | 6 +-- src/gallium/drivers/ilo/ilo_gpe_gen7.c | 52 +++++++++++++++---------- src/gallium/drivers/ilo/ilo_gpe_gen7.h | 5 +-- src/gallium/drivers/ilo/ilo_screen.c | 40 ++++++++++++++++--- 10 files changed, 127 insertions(+), 96 deletions(-) diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c index 490883a..ae99e3e 100644 --- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c @@ -299,7 +299,7 @@ gen6_pipeline_common_urb(struct ilo_3d_pipeline *p, /* in bytes */ vs_entry_size *= sizeof(float) * 4; gs_entry_size *= sizeof(float) * 4; - vs_total_size = ilo->urb.size * 1024; + vs_total_size = ilo->dev->urb_size; if (gs_active) { vs_total_size /= 2; @@ -480,8 +480,7 @@ gen6_pipeline_vs(struct ilo_3d_pipeline *p, const struct ilo_shader *vs = (ilo->vs)? ilo->vs->shader : NULL; const int num_samplers = ilo->samplers[PIPE_SHADER_VERTEX].num_samplers; - p->gen6_3DSTATE_VS(p->dev, - vs, ilo->max_vs_threads, num_samplers, p->cp); + p->gen6_3DSTATE_VS(p->dev, vs, num_samplers, p->cp); } if (emit_3dstate_constant_vs && p->dev->gen == ILO_GEN(6)) @@ -506,8 +505,7 @@ gen6_pipeline_gs(struct ilo_3d_pipeline *p, if (gs) assert(!gs->pcb.clip_state_size); - p->gen6_3DSTATE_GS(p->dev, - gs, ilo->max_gs_threads, vs, + p->gen6_3DSTATE_GS(p->dev, gs, vs, (vs) ? vs->cache_offset + vs->gs_offsets[num_vertices - 1] : 0, p->cp); } @@ -666,8 +664,7 @@ gen6_pipeline_wm(struct ilo_3d_pipeline *p, if (p->dev->gen == ILO_GEN(6) && session->hw_ctx_changed) gen6_wa_pipe_control_wm_max_threads_stall(p); - p->gen6_3DSTATE_WM(p->dev, - fs, ilo->max_wm_threads, num_samplers, + p->gen6_3DSTATE_WM(p->dev, fs, num_samplers, ilo->rasterizer, dual_blend, cc_may_kill, p->cp); } } diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c index 3a9def4..91fa7f4 100644 --- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c @@ -206,7 +206,7 @@ gen7_pipeline_common_urb(struct ilo_3d_pipeline *p, vs_entry_size = ilo->vertex_elements->num_elements; vs_entry_size *= sizeof(float) * 4; - vs_total_size = ilo->urb.size * 1024 - offset; + vs_total_size = ilo->dev->urb_size - offset; gen7_wa_pipe_control_vs_depth_stall(p); @@ -361,7 +361,7 @@ gen7_pipeline_gs(struct ilo_3d_pipeline *p, /* 3DSTATE_CONSTANT_GS and 3DSTATE_GS */ if (session->hw_ctx_changed) { p->gen6_3DSTATE_CONSTANT_GS(p->dev, 0, 0, 0, p->cp); - p->gen7_3DSTATE_GS(p->dev, NULL, 0, 0, p->cp); + p->gen7_3DSTATE_GS(p->dev, NULL, 0, p->cp); } /* 3DSTATE_BINDING_TABLE_POINTERS_GS */ @@ -466,9 +466,7 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p, if (fs) assert(!fs->pcb.clip_state_size); - p->gen7_3DSTATE_PS(p->dev, - fs, ilo->max_wm_threads, num_samplers, - dual_blend, p->cp); + p->gen7_3DSTATE_PS(p->dev, fs, num_samplers, dual_blend, p->cp); } /* 3DSTATE_SCISSOR_STATE_POINTERS */ diff --git a/src/gallium/drivers/ilo/ilo_common.h b/src/gallium/drivers/ilo/ilo_common.h index d86b10b..e00d013 100644 --- a/src/gallium/drivers/ilo/ilo_common.h +++ b/src/gallium/drivers/ilo/ilo_common.h @@ -62,6 +62,7 @@ struct ilo_dev_info { int gen; int gt; + int urb_size; }; extern int ilo_debug; diff --git a/src/gallium/drivers/ilo/ilo_context.c b/src/gallium/drivers/ilo/ilo_context.c index 02010ee..a8c1b1b 100644 --- a/src/gallium/drivers/ilo/ilo_context.c +++ b/src/gallium/drivers/ilo/ilo_context.c @@ -137,44 +137,6 @@ ilo_context_create(struct pipe_screen *screen, void *priv) ilo->winsys = is->winsys; ilo->dev = &is->dev; - /* stolen from classic i965 */ - /* WM maximum threads is number of EUs times number of threads per EU. */ - if (ilo->dev->gen >= ILO_GEN(7)) { - if (ilo->dev->gt == 1) { - ilo->max_wm_threads = 48; - ilo->max_vs_threads = 36; - ilo->max_gs_threads = 36; - ilo->urb.size = 128; - ilo->urb.max_vs_entries = 512; - ilo->urb.max_gs_entries = 192; - } else if (ilo->dev->gt == 2) { - ilo->max_wm_threads = 172; - ilo->max_vs_threads = 128; - ilo->max_gs_threads = 128; - ilo->urb.size = 256; - ilo->urb.max_vs_entries = 704; - ilo->urb.max_gs_entries = 320; - } else { - assert(!"Unknown gen7 device."); - } - } else if (ilo->dev->gen == ILO_GEN(6)) { - if (ilo->dev->gt == 2) { - ilo->max_wm_threads = 80; - ilo->max_vs_threads = 60; - ilo->max_gs_threads = 60; - ilo->urb.size = 64; /* volume 5c.5 section 5.1 */ - ilo->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */ - ilo->urb.max_gs_entries = 256; - } else { - ilo->max_wm_threads = 40; - ilo->max_vs_threads = 24; - ilo->max_gs_threads = 21; /* conservative; 24 if rendering disabled */ - ilo->urb.size = 32; /* volume 5c.5 section 5.1 */ - ilo->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */ - ilo->urb.max_gs_entries = 256; - } - } - ilo->cp = ilo_cp_create(ilo->winsys, is->dev.has_llc); ilo->shader_cache = ilo_shader_cache_create(ilo->winsys); if (ilo->cp) diff --git a/src/gallium/drivers/ilo/ilo_context.h b/src/gallium/drivers/ilo/ilo_context.h index 8396fef..3e12bf1 100644 --- a/src/gallium/drivers/ilo/ilo_context.h +++ b/src/gallium/drivers/ilo/ilo_context.h @@ -73,15 +73,6 @@ struct ilo_context { struct intel_winsys *winsys; struct ilo_dev_info *dev; - int max_vs_threads; - int max_gs_threads; - int max_wm_threads; - struct { - int size; - int max_vs_entries; - int max_gs_entries; - } urb; - struct ilo_cp *cp; struct intel_bo *last_cp_bo; diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.c b/src/gallium/drivers/ilo/ilo_gpe_gen6.c index b0949de..b3efe2a 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.c +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.c @@ -1021,13 +1021,13 @@ gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev, static void gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev, const struct ilo_shader *vs, - int max_threads, int num_samplers, + int num_samplers, struct ilo_cp *cp) { const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10); const uint8_t cmd_len = 6; uint32_t dw2, dw4, dw5; - int vue_read_len; + int vue_read_len, max_threads; ILO_GPE_VALID_GEN(dev, 6, 7); @@ -1057,6 +1057,36 @@ gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev, if (!vue_read_len) vue_read_len = 1; + switch (dev->gen) { + case ILO_GEN(6): + /* + * From the Sandy Bridge PRM, volume 1 part 1, page 22: + * + * "Device # of EUs #Threads/EU + * SNB GT2 12 5 + * SNB GT1 6 4" + */ + max_threads = (dev->gt == 2) ? 60 : 24; + break; + case ILO_GEN(7): + /* + * From the Ivy Bridge PRM, volume 1 part 1, page 18: + * + * "Device # of EUs #Threads/EU + * Ivy Bridge (GT2) 16 8 + * Ivy Bridge (GT1) 6 6" + */ + max_threads = (dev->gt == 2) ? 128 : 36; + break; + case ILO_GEN(7.5): + /* see brwCreateContext() */ + max_threads = (dev->gt == 2) ? 280 : 70; + break; + default: + max_threads = 1; + break; + } + dw2 = ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT; if (false) dw2 |= GEN6_VS_FLOATING_POINT_MODE_ALT; @@ -1086,7 +1116,7 @@ gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev, static void gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev, const struct ilo_shader *gs, - int max_threads, const struct ilo_shader *vs, + const struct ilo_shader *vs, uint32_t vs_offset, struct ilo_cp *cp) { @@ -1105,7 +1135,7 @@ gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev, dw6 = 0; } else { - int vue_read_len; + int max_threads, vue_read_len; /* * From the Sandy Bridge PRM, volume 2 part 1, page 154: @@ -1124,6 +1154,15 @@ gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev, * * As such, we always enable rendering, and limit the number of threads. */ + if (dev->gt == 2) { + /* maximum is 60, but limited to 28 */ + max_threads = 28; + } + else { + /* maximum is 24, but limited to 21 (see brwCreateContext()) */ + max_threads = 21; + } + if (max_threads > 28) max_threads = 28; @@ -1798,7 +1837,7 @@ gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev, static void gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev, const struct ilo_shader *fs, - int max_threads, int num_samplers, + int num_samplers, const struct pipe_rasterizer_state *rasterizer, bool dual_blend, bool cc_may_kill, struct ilo_cp *cp) @@ -1807,9 +1846,13 @@ gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev, const uint8_t cmd_len = 9; const int num_samples = 1; uint32_t dw2, dw4, dw5, dw6; + int max_threads; ILO_GPE_VALID_GEN(dev, 6, 6); + /* see brwCreateContext() */ + max_threads = (dev->gt == 2) ? 80 : 40; + if (!fs) { ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.h b/src/gallium/drivers/ilo/ilo_gpe_gen6.h index f976890..62166d9 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.h +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.h @@ -236,13 +236,13 @@ typedef void typedef void (*ilo_gpe_gen6_3DSTATE_VS)(const struct ilo_dev_info *dev, const struct ilo_shader *vs, - int max_threads, int num_samplers, + int num_samplers, struct ilo_cp *cp); typedef void (*ilo_gpe_gen6_3DSTATE_GS)(const struct ilo_dev_info *dev, const struct ilo_shader *gs, - int max_threads, const struct ilo_shader *vs, + const struct ilo_shader *vs, uint32_t vs_offset, struct ilo_cp *cp); @@ -264,7 +264,7 @@ typedef void typedef void (*ilo_gpe_gen6_3DSTATE_WM)(const struct ilo_dev_info *dev, const struct ilo_shader *fs, - int max_threads, int num_samplers, + int num_samplers, const struct pipe_rasterizer_state *rasterizer, bool dual_blend, bool cc_may_kill, struct ilo_cp *cp); diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.c b/src/gallium/drivers/ilo/ilo_gpe_gen7.c index 56d1ec2..6139332 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen7.c +++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.c @@ -95,15 +95,25 @@ gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev, static void gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev, const struct ilo_shader *gs, - int max_threads, int num_samplers, + int num_samplers, struct ilo_cp *cp) { const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11); const uint8_t cmd_len = 7; uint32_t dw2, dw4, dw5; + int max_threads; ILO_GPE_VALID_GEN(dev, 7, 7); + switch (dev->gen) { + case ILO_GEN(7): + max_threads = (dev->gt == 2) ? 128 : 36; + break; + default: + max_threads = 1; + break; + } + if (!gs) { ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); @@ -597,27 +607,18 @@ gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev, static void gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev, const struct ilo_shader *fs, - int max_threads, int num_samplers, - bool dual_blend, + int num_samplers, bool dual_blend, struct ilo_cp *cp) { const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20); const uint8_t cmd_len = 8; uint32_t dw2, dw4, dw5; + int max_threads; ILO_GPE_VALID_GEN(dev, 7, 7); - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 286: - * - * "This field (Maximum Number of Threads) must have an odd value so - * that the max number of PS threads is even." - */ - max_threads &= ~1; - - /* the valid range is [4, 48] */ - if (max_threads < 4) - max_threads = 4; + /* see brwCreateContext() */ + max_threads = (dev->gt == 2) ? 172 : 48; if (!fs) { ilo_cp_begin(cp, cmd_len); @@ -793,7 +794,7 @@ gen7_emit_3dstate_urb(const struct ilo_dev_info *dev, const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop); const uint8_t cmd_len = 2; const int row_size = 64; /* 512 bits */ - int alloc_size, num_entries; + int alloc_size, num_entries, min_entries, max_entries; ILO_GPE_VALID_GEN(dev, 7, 7); @@ -824,16 +825,27 @@ gen7_emit_3dstate_urb(const struct ilo_dev_info *dev, switch (subop) { case 0x30: /* 3DSTATE_URB_VS */ - assert(num_entries >= 32); - if (dev->gt == 2 && num_entries > 704) - num_entries = 704; - else if (dev->gt == 1 && num_entries > 512) - num_entries = 512; + min_entries = 32; + max_entries = (dev->gt == 2) ? 704 : 512; + + assert(num_entries >= min_entries); + if (num_entries > max_entries) + num_entries = max_entries; + break; + case 0x31: /* 3DSTATE_URB_HS */ + max_entries = (dev->gt == 2) ? 64 : 32; + if (num_entries > max_entries) + num_entries = max_entries; break; case 0x32: /* 3DSTATE_URB_DS */ if (num_entries) assert(num_entries >= 138); break; + case 0x33: /* 3DSTATE_URB_GS */ + max_entries = (dev->gt == 2) ? 320 : 192; + if (num_entries > max_entries) + num_entries = max_entries; + break; default: break; } diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.h b/src/gallium/drivers/ilo/ilo_gpe_gen7.h index 727542d..d9626e1 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen7.h +++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.h @@ -166,7 +166,7 @@ typedef ilo_gpe_gen6_3DSTATE_VS ilo_gpe_gen7_3DSTATE_VS; typedef void (*ilo_gpe_gen7_3DSTATE_GS)(const struct ilo_dev_info *dev, const struct ilo_shader *gs, - int max_threads, int num_samplers, + int num_samplers, struct ilo_cp *cp); typedef ilo_gpe_gen6_3DSTATE_CLIP ilo_gpe_gen7_3DSTATE_CLIP; @@ -239,8 +239,7 @@ typedef void typedef void (*ilo_gpe_gen7_3DSTATE_PS)(const struct ilo_dev_info *dev, const struct ilo_shader *fs, - int max_threads, int num_samplers, - bool dual_blend, + int num_samplers, bool dual_blend, struct ilo_cp *cp); typedef void diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index c74efda..1e1e751 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -626,29 +626,57 @@ init_dev(struct ilo_dev_info *dev, const struct intel_winsys_info *info) dev->has_gen7_sol_reset = info->has_gen7_sol_reset; dev->has_llc = info->has_llc; + /* + * From the Sandy Bridge PRM, volume 4 part 2, page 18: + * + * "[DevSNB]: The GT1 product's URB provides 32KB of storage, arranged + * as 1024 256-bit rows. The GT2 product's URB provides 64KB of + * storage, arranged as 2048 256-bit rows. A row corresponds in size + * to an EU GRF register. Read/write access to the URB is generally + * supported on a row-granular basis." + * + * From the Ivy Bridge PRM, volume 4 part 2, page 17: + * + * "URB Size URB Rows URB Rows when SLM Enabled + * 128k 4096 2048 + * 256k 8096 4096" + */ + if (IS_HASWELL(info->devid)) { dev->gen = ILO_GEN(7.5); - if (IS_HSW_GT2(info->devid)) + if (IS_HSW_GT2(info->devid)) { dev->gt = 2; - else + dev->urb_size = 256 * 1024; + } + else { dev->gt = 1; + dev->urb_size = 128 * 1024; + } } else if (IS_GEN7(info->devid)) { dev->gen = ILO_GEN(7); - if (IS_IVB_GT2(info->devid)) + if (IS_IVB_GT2(info->devid)) { dev->gt = 2; - else + dev->urb_size = 256 * 1024; + } + else { dev->gt = 1; + dev->urb_size = 128 * 1024; + } } else if (IS_GEN6(info->devid)) { dev->gen = ILO_GEN(6); - if (IS_SNB_GT2(info->devid)) + if (IS_SNB_GT2(info->devid)) { dev->gt = 2; - else + dev->urb_size = 64 * 1024; + } + else { dev->gt = 1; + dev->urb_size = 32 * 1024; + } } else { ilo_err("unknown GPU generation\n"); -- 2.7.4