haswell: fix render kernels.
authorGwenole Beauchesne <gwenole.beauchesne@intel.com>
Tue, 18 Sep 2012 13:40:03 +0000 (09:40 -0400)
committerXiang, Haihao <haihao.xiang@intel.com>
Tue, 23 Oct 2012 05:56:02 +0000 (13:56 +0800)
Regenerate render kernels for Haswell because JMPI instruction semantics
changed there. In particular, the offset is now expressed in bytes instead
of 64-bit units.

Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne@intel.com>
src/i965_render.c
src/shaders/render/Makefile.am
src/shaders/render/exa_wm_src_sample_planar.g7b.haswell [new file with mode: 0644]

index ecb2217..9b277af 100644 (file)
@@ -131,6 +131,14 @@ static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
 #include "shaders/render/exa_wm_write.g7b"
 };
 
+/* Programs for Haswell */
+static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
+#include "shaders/render/exa_wm_src_affine.g7b"
+#include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
+#include "shaders/render/exa_wm_yuv_rgb.g7b"
+#include "shaders/render/exa_wm_write.g7b"
+};
+
 #define SURFACE_STATE_PADDED_SIZE_I965  ALIGN(sizeof(struct i965_surface_state), 32)
 #define SURFACE_STATE_PADDED_SIZE_GEN7  ALIGN(sizeof(struct gen7_surface_state), 32)
 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
@@ -255,6 +263,31 @@ static struct i965_kernel render_kernels_gen7[] = {
     }
 };
 
+static struct i965_kernel render_kernels_gen7_haswell[] = {
+    {
+        "SF",
+        SF_KERNEL,
+        sf_kernel_static_gen7,
+        sizeof(sf_kernel_static_gen7),
+        NULL
+    },
+    {
+        "PS",
+        PS_KERNEL,
+        ps_kernel_static_gen7_haswell,
+        sizeof(ps_kernel_static_gen7_haswell),
+        NULL
+    },
+
+    {
+        "PS_SUBPIC",
+        PS_SUBPIC_KERNEL,
+        ps_subpic_kernel_static_gen7,
+        sizeof(ps_subpic_kernel_static_gen7),
+        NULL
+    }
+};
+
 #define URB_VS_ENTRIES       8
 #define URB_VS_ENTRY_SIZE     1
 
@@ -3035,7 +3068,9 @@ i965_render_init(VADriverContextP ctx)
                                  sizeof(render_kernels_gen6[0])));
 
     if (IS_GEN7(i965->intel.device_id))
-        memcpy(render_state->render_kernels, render_kernels_gen7, sizeof(render_state->render_kernels));
+        memcpy(render_state->render_kernels,
+               (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7),
+               sizeof(render_state->render_kernels));
     else if (IS_GEN6(i965->intel.device_id))
         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
     else if (IS_IRONLAKE(i965->intel.device_id))
index f9540b0..dac58c7 100644 (file)
@@ -64,17 +64,23 @@ INTEL_G7B =                         \
        exa_wm_write.g7b                \
        exa_wm_yuv_rgb.g7b
 
+# XXX: only regenerate binary for EU code containing JMPI instructions
+INTEL_G7B_HASWELL = \
+       exa_wm_src_sample_planar.g7b.haswell    \
+       $(NULL)
+
 TARGETS  =
 if HAVE_GEN4ASM
 TARGETS += $(INTEL_G4B)
 TARGETS += $(INTEL_G4B_GEN5)
 TARGETS += $(INTEL_G6B)
 TARGETS += $(INTEL_G7B)
+TARGETS += $(INTEL_G7B_HASWELL)
 endif
 
 all-local: $(TARGETS)
 
-SUFFIXES = .g4a .g4s .g4b .g6a .g6s .g6b .g7a .g7s .g7b
+SUFFIXES = .g4a .g4s .g4b .g6a .g6s .g6b .g7a .g7s .g7b .g7b.haswell
 
 if HAVE_GEN4ASM
 $(INTEL_G4S): $(INTEL_G4A) $(INTEL_G4I)
@@ -96,6 +102,8 @@ $(INTEL_G7S): $(INTEL_G7A) $(INTEL_G7I)
        $(AM_V_GEN)m4 $< > $@
 .g7s.g7b:
        $(AM_V_GEN)$(GEN4ASM) -g 7 -o $@ $<
+.g7s.g7b.haswell:
+       $(AM_V_GEN)$(GEN4ASM) -g 7.5 -o $@ $<
 endif
 
 CLEANFILES = \
@@ -113,6 +121,7 @@ EXTRA_DIST = \
        $(INTEL_G6B)            \
        $(INTEL_G7A)            \
        $(INTEL_G7B)            \
+       $(INTEL_G7B_HASWELL)    \
        $(NULL)
 
 # Extra clean files so that maintainer-clean removes *everything*
diff --git a/src/shaders/render/exa_wm_src_sample_planar.g7b.haswell b/src/shaders/render/exa_wm_src_sample_planar.g7b.haswell
new file mode 100644 (file)
index 0000000..dc388c2
--- /dev/null
@@ -0,0 +1,20 @@
+   { 0x01000010, 0x20002d3c, 0x000000c0, 0x00010001 },
+   { 0x00010020, 0x34001c00, 0x00001400, 0x000000c0 },
+   { 0x01000010, 0x20002d3c, 0x000000c0, 0x00020002 },
+   { 0x00010020, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
+   { 0x00600201, 0x28200021, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x22001ca9, 0x00000820, 0x0a2c0203 },
+   { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
+   { 0x00600201, 0x28200021, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x22401ca9, 0x00000820, 0x0a2c0405 },
+   { 0x00000020, 0x34001c00, 0x00001400, 0x00000060 },
+   { 0x00800201, 0x220003fd, 0x00000000, 0x3f000000 },
+   { 0x00800201, 0x224003fd, 0x00000000, 0x3f000000 },
+   { 0x00000020, 0x34001c00, 0x00001400, 0x00000030 },
+   { 0x00000201, 0x20080061, 0x00000000, 0x0000c000 },
+   { 0x00600201, 0x28200021, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x22001ca9, 0x00000820, 0x0a4c0203 },
+   { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
+   { 0x00600201, 0x28200021, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x21c01ca9, 0x00000820, 0x0a2c0001 },