Render: Update the maximum number of WM threads
authorXiang, Haihao <haihao.xiang@intel.com>
Fri, 21 Dec 2012 02:25:57 +0000 (10:25 +0800)
committerXiang, Haihao <haihao.xiang@intel.com>
Fri, 28 Dec 2012 01:35:43 +0000 (09:35 +0800)
The number is stolen from Mesa.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=57323
Signe-off-by: Xiang, Haihao <haihao.xiang@intel.com>
Tested-by: zaverel <zaverel@free.fr>
(cherry picked from commit a140c632046e50a41bf75da097834fd9954b9561)

src/i965_render.c
src/i965_render.h

index 53237e4..f14a80a 100644 (file)
@@ -450,12 +450,11 @@ i965_subpic_render_wm_unit(VADriverContextP ctx)
 
     if (IS_IRONLAKE(i965->intel.device_id)) {
         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
-        wm_state->wm5.max_threads = 12 * 6 - 1;
     } else {
         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
-        wm_state->wm5.max_threads = 10 * 5 - 1;
     }
 
+    wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
     wm_state->wm5.thread_dispatch_enable = 1;
     wm_state->wm5.enable_16_pix = 1;
     wm_state->wm5.enable_8_pix = 0;
@@ -515,12 +514,11 @@ i965_render_wm_unit(VADriverContextP ctx)
 
     if (IS_IRONLAKE(i965->intel.device_id)) {
         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
-        wm_state->wm5.max_threads = 12 * 6 - 1;
     } else {
         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
-        wm_state->wm5.max_threads = 10 * 5 - 1;
     }
 
+    wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
     wm_state->wm5.thread_dispatch_enable = 1;
     wm_state->wm5.enable_16_pix = 1;
     wm_state->wm5.enable_8_pix = 0;
@@ -2074,7 +2072,7 @@ gen6_emit_wm_state(VADriverContextP ctx, int kernel)
               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
     OUT_BATCH(batch, 0);
     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
-    OUT_BATCH(batch, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
+    OUT_BATCH(batch, ((render_state->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
@@ -2850,7 +2848,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel)
               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
     OUT_BATCH(batch, 0); /* scratch space base offset */
     OUT_BATCH(batch, 
-              ((86 - 1) << max_threads_shift) | num_samples |
+              ((render_state->max_wm_threads - 1) << max_threads_shift) | num_samples |
               GEN7_PS_PUSH_CONSTANT_ENABLE |
               GEN7_PS_ATTRIBUTE_ENABLE |
               GEN7_PS_16_DISPATCH_ENABLE);
@@ -3129,6 +3127,25 @@ i965_render_init(VADriverContextP ctx)
                       4096, 64);
     assert(render_state->curbe.bo);
 
+    if (IS_IVB_GT1(i965->intel.device_id) ||
+        IS_HSW_GT1(i965->intel.device_id)) {
+        render_state->max_wm_threads = 48;
+    } else if (IS_IVB_GT2(i965->intel.device_id) ||
+               IS_HSW_GT2(i965->intel.device_id)) {
+        render_state->max_wm_threads = 172;
+    } else if (IS_SNB_GT1(i965->intel.device_id)) {
+        render_state->max_wm_threads = 40;
+    } else if (IS_SNB_GT2(i965->intel.device_id)) {
+        render_state->max_wm_threads = 80;
+    } else if (IS_IRONLAKE(i965->intel.device_id)) {
+        render_state->max_wm_threads = 72; /* 12 * 6 */
+    } else if (IS_G4X(i965->intel.device_id)) {
+        render_state->max_wm_threads = 50; /* 12 * 5 */
+    } else {
+        /* should never get here !!! */
+        assert(0);
+    }
+
     return True;
 }
 
index c2fc2bf..44a9529 100644 (file)
@@ -76,6 +76,8 @@ struct i965_render_state
     int pp_flag; /* 0: disable, 1: enable */
 
     struct i965_kernel render_kernels[3];
+    
+    int max_wm_threads;
 };
 
 Bool i965_render_init(VADriverContextP ctx);