Started to implement Gen code generation
authorBenjamin Segovia <segovia.benjamin@gmail.com>
Wed, 21 Mar 2012 03:57:42 +0000 (20:57 -0700)
committerKeith Packard <keithp@keithp.com>
Fri, 10 Aug 2012 23:15:47 +0000 (16:15 -0700)
15 files changed:
backend/CMakeLists.txt
backend/kernels/Makefile [new file with mode: 0644]
backend/src/CMakeLists.txt
backend/src/gen/brw_chipset.h [new file with mode: 0644]
backend/src/gen/brw_defines.h [new file with mode: 0644]
backend/src/gen/brw_disasm.c
backend/src/gen/brw_eu.c
backend/src/gen/brw_eu.h
backend/src/gen/brw_eu_emit.c
backend/src/gen/brw_structs.h [new file with mode: 0644]
backend/src/gen/program.cpp [new file with mode: 0644]
backend/src/gen/program.h [new file with mode: 0644]
backend/src/gen/program.hpp [new file with mode: 0644]
backend/src/llvm/llvm_to_gen.cpp
backend/src/llvm/llvm_to_gen.hpp

index cea4e58..75d6c5f 100644 (file)
@@ -53,7 +53,8 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
   # set (VISIBILITY_FLAG "-fvisibility=hidden")
 
   if (COMPILER STREQUAL "GCC")
-    set (CMAKE_CXX_FLAGS "-Wstrict-aliasing=2 -Wno-invalid-offsetof -fstrict-aliasing -msse2 -ffast-math -fPIC -Wall -fno-rtti -std=c++0x")
+    set (CMAKE_C_CXX_FLAGS "-Wstrict-aliasing=2 -fstrict-aliasing -msse2 -ffast-math -fPIC -Wall")
+    set (CMAKE_CXX_FLAGS "${CMAKE_C_CXX_FLAGS}  -Wno-invalid-offsetof -fno-rtti -std=c++0x")
     set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}")
     set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}")
     set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${VISIBILITY_FLAG} -Wl,-E")
@@ -64,6 +65,14 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
     set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
     set (CMAKE_CXX_FLAGS_MINSIZEREL     "-Os -DNDEBUG -DGBE_DEBUG=0")
     set (CMAKE_CXX_FLAGS_RELEASE        "-O3 -DNDEBUG -DGBE_DEBUG=0")
+    set (CMAKE_C_FLAGS "${CMAKE_C_CXX_FLAGS}")
+    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}")
+    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}")
+    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${VISIBILITY_FLAG} -Wl,-E")
+    set (CMAKE_C_FLAGS_DEBUG          "-g -DGBE_DEBUG=1")
+    set (CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
+    set (CMAKE_C_FLAGS_MINSIZEREL     "-Os -DNDEBUG -DGBE_DEBUG=0")
+    set (CMAKE_C_FLAGS_RELEASE        "-O3 -DNDEBUG -DGBE_DEBUG=0")
   elseif (COMPILER STREQUAL "CLANG")
     set (CMAKE_C_COMPILER             "clang")
     set (CMAKE_C_FLAGS                "-Wall -std=c99")
diff --git a/backend/kernels/Makefile b/backend/kernels/Makefile
new file mode 100644 (file)
index 0000000..c5a0ebd
--- /dev/null
@@ -0,0 +1,34 @@
+%.ll : %.cl Makefile stdlib.h
+       ./compile.sh $<
+
+all: add.ll\
+       add2.ll\
+       cmp.ll\
+       cmp_cvt.ll\
+       complex_struct.ll\
+       cycle.ll\
+       extract.ll\
+       function.ll\
+       function_param.ll\
+       get_global_id.ll\
+       insert.ll\
+       load_store.ll\
+       loop.ll\
+       loop2.ll\
+       loop3.ll\
+       loop4.ll\
+       loop5.ll\
+       select.ll\
+       short.ll\
+       shuffle.ll\
+       simple_float4.ll\
+       simple_float4_2.ll\
+       simple_float4_3.ll\
+       store.ll\
+       struct.ll\
+       struct2.ll\
+       test_select.ll\
+       undefined.ll\
+       vector_constant.ll\
+       void.ll
+
index 0560191..69b542a 100644 (file)
@@ -42,7 +42,10 @@ else (GBE_USE_BLOB)
     ir/function.hpp
     ir/value.cpp
     ir/value.hpp
-    gen/brw_disasm.c)
+    gen/program.cpp
+    gen/brw_disasm.c
+    gen/brw_eu_emit.c
+    gen/brw_eu.c)
 
   if (GBE_COMPILE_UTESTS)
     set (GBE_SRC
diff --git a/backend/src/gen/brw_chipset.h b/backend/src/gen/brw_chipset.h
new file mode 100644 (file)
index 0000000..c2a06bb
--- /dev/null
@@ -0,0 +1,191 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+ /*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#define PCI_CHIP_I810                  0x7121
+#define PCI_CHIP_I810_DC100            0x7123
+#define PCI_CHIP_I810_E                        0x7125
+#define PCI_CHIP_I815                  0x1132
+
+#define PCI_CHIP_I830_M                        0x3577
+#define PCI_CHIP_845_G                 0x2562
+#define PCI_CHIP_I855_GM               0x3582
+#define PCI_CHIP_I865_G                        0x2572
+
+#define PCI_CHIP_I915_G                        0x2582
+#define PCI_CHIP_E7221_G               0x258A
+#define PCI_CHIP_I915_GM               0x2592
+#define PCI_CHIP_I945_G                        0x2772
+#define PCI_CHIP_I945_GM               0x27A2
+#define PCI_CHIP_I945_GME              0x27AE
+
+#define PCI_CHIP_Q35_G                 0x29B2
+#define PCI_CHIP_G33_G                 0x29C2
+#define PCI_CHIP_Q33_G                 0x29D2
+
+#define PCI_CHIP_IGD_GM                        0xA011
+#define PCI_CHIP_IGD_G                 0xA001
+
+#define IS_IGDGM(devid)        (devid == PCI_CHIP_IGD_GM)
+#define IS_IGDG(devid) (devid == PCI_CHIP_IGD_G)
+#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid))
+
+#define PCI_CHIP_I965_G                        0x29A2
+#define PCI_CHIP_I965_Q                        0x2992
+#define PCI_CHIP_I965_G_1              0x2982
+#define PCI_CHIP_I946_GZ               0x2972
+#define PCI_CHIP_I965_GM                0x2A02
+#define PCI_CHIP_I965_GME               0x2A12
+
+#define PCI_CHIP_GM45_GM                0x2A42
+
+#define PCI_CHIP_IGD_E_G                0x2E02
+#define PCI_CHIP_Q45_G                  0x2E12
+#define PCI_CHIP_G45_G                  0x2E22
+#define PCI_CHIP_G41_G                  0x2E32
+#define PCI_CHIP_B43_G                  0x2E42
+#define PCI_CHIP_B43_G1                 0x2E92
+
+#define PCI_CHIP_ILD_G                  0x0042
+#define PCI_CHIP_ILM_G                  0x0046
+
+#define PCI_CHIP_SANDYBRIDGE_GT1       0x0102  /* Desktop */
+#define PCI_CHIP_SANDYBRIDGE_GT2       0x0112
+#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS  0x0122
+#define PCI_CHIP_SANDYBRIDGE_M_GT1     0x0106  /* Mobile */
+#define PCI_CHIP_SANDYBRIDGE_M_GT2     0x0116
+#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS        0x0126
+#define PCI_CHIP_SANDYBRIDGE_S         0x010A  /* Server */
+
+#define PCI_CHIP_IVYBRIDGE_GT1          0x0152  /* Desktop */
+#define PCI_CHIP_IVYBRIDGE_GT2          0x0162
+#define PCI_CHIP_IVYBRIDGE_M_GT1        0x0156  /* Mobile */
+#define PCI_CHIP_IVYBRIDGE_M_GT2        0x0166
+#define PCI_CHIP_IVYBRIDGE_S_GT1        0x015a  /* Server */
+
+#define IS_MOBILE(devid)       (devid == PCI_CHIP_I855_GM || \
+                                devid == PCI_CHIP_I915_GM || \
+                                devid == PCI_CHIP_I945_GM || \
+                                devid == PCI_CHIP_I945_GME || \
+                                devid == PCI_CHIP_I965_GM || \
+                                devid == PCI_CHIP_I965_GME || \
+                                devid == PCI_CHIP_GM45_GM || \
+                                IS_IGD(devid) || \
+                                devid == PCI_CHIP_ILM_G)
+
+#define IS_G45(devid)           (devid == PCI_CHIP_IGD_E_G || \
+                                 devid == PCI_CHIP_Q45_G || \
+                                 devid == PCI_CHIP_G45_G || \
+                                 devid == PCI_CHIP_G41_G || \
+                                 devid == PCI_CHIP_B43_G || \
+                                 devid == PCI_CHIP_B43_G1)
+#define IS_GM45(devid)          (devid == PCI_CHIP_GM45_GM)
+#define IS_G4X(devid)          (IS_G45(devid) || IS_GM45(devid))
+
+#define IS_ILD(devid)           (devid == PCI_CHIP_ILD_G)
+#define IS_ILM(devid)           (devid == PCI_CHIP_ILM_G)
+#define IS_GEN5(devid)          (IS_ILD(devid) || IS_ILM(devid))
+
+#define IS_915(devid)          (devid == PCI_CHIP_I915_G || \
+                                devid == PCI_CHIP_E7221_G || \
+                                devid == PCI_CHIP_I915_GM)
+
+#define IS_945(devid)          (devid == PCI_CHIP_I945_G || \
+                                devid == PCI_CHIP_I945_GM || \
+                                devid == PCI_CHIP_I945_GME || \
+                                devid == PCI_CHIP_G33_G || \
+                                devid == PCI_CHIP_Q33_G || \
+                                devid == PCI_CHIP_Q35_G || IS_IGD(devid))
+
+#define IS_GEN4(devid)         (devid == PCI_CHIP_I965_G || \
+                                devid == PCI_CHIP_I965_Q || \
+                                devid == PCI_CHIP_I965_G_1 || \
+                                devid == PCI_CHIP_I965_GM || \
+                                devid == PCI_CHIP_I965_GME || \
+                                devid == PCI_CHIP_I946_GZ || \
+                                IS_G4X(devid))
+
+/* Compat macro for intel_decode.c */
+#define IS_IRONLAKE(devid)     IS_GEN5(devid)
+
+#define IS_SNB_GT1(devid)      (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
+                                devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
+                                devid == PCI_CHIP_SANDYBRIDGE_S)
+
+#define IS_SNB_GT2(devid)      (devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
+                                devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \
+                                devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \
+                                devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS)
+
+#define IS_GEN6(devid)         (IS_SNB_GT1(devid) || IS_SNB_GT2(devid))
+
+#define IS_IVB_GT1(devid)       (devid == PCI_CHIP_IVYBRIDGE_GT1 || \
+                                devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \
+                                devid == PCI_CHIP_IVYBRIDGE_S_GT1)
+
+#define IS_IVB_GT2(devid)       (devid == PCI_CHIP_IVYBRIDGE_GT2 || \
+                                devid == PCI_CHIP_IVYBRIDGE_M_GT2)
+
+#define IS_IVYBRIDGE(devid)     (IS_IVB_GT1(devid) || IS_IVB_GT2(devid))
+
+#define IS_GEN7(devid)         IS_IVYBRIDGE(devid)
+
+#define IS_965(devid)          (IS_GEN4(devid) || \
+                                IS_G4X(devid) || \
+                                IS_GEN5(devid) || \
+                                IS_GEN6(devid) || \
+                                IS_GEN7(devid))
+
+#define IS_9XX(devid)          (IS_915(devid) || \
+                                IS_945(devid) || \
+                                IS_965(devid))
+
+#define IS_GEN3(devid)         (IS_915(devid) ||       \
+                                IS_945(devid))
+
+#define IS_GEN2(devid)         (devid == PCI_CHIP_I830_M || \
+                                devid == PCI_CHIP_845_G ||  \
+                                devid == PCI_CHIP_I855_GM ||   \
+                                devid == PCI_CHIP_I865_G)
diff --git a/backend/src/gen/brw_defines.h b/backend/src/gen/brw_defines.h
new file mode 100644 (file)
index 0000000..e991a84
--- /dev/null
@@ -0,0 +1,1499 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#define INTEL_MASK(high, low) (((1<<((high)-(low)+1))-1)<<(low))
+#define SET_FIELD(value, field) (((value) << field ## _SHIFT) & field ## _MASK)
+#define GET_FIELD(word, field) (((word)  & field ## _MASK) >> field ## _SHIFT)
+
+#ifndef BRW_DEFINES_H
+#define BRW_DEFINES_H
+
+/* 3D state:
+ */
+#define PIPE_CONTROL_NOWRITE          0x00
+#define PIPE_CONTROL_WRITEIMMEDIATE   0x01
+#define PIPE_CONTROL_WRITEDEPTH       0x02
+#define PIPE_CONTROL_WRITETIMESTAMP   0x03
+
+#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
+#define PIPE_CONTROL_GTTWRITE_GLOBAL        0x01
+
+#define CMD_3D_PRIM                                 0x7b00 /* 3DPRIMITIVE */
+/* DW0 */
+# define GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT            10
+# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15)
+# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 15)
+/* DW1 */
+# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
+# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 8)
+
+#define _3DPRIM_POINTLIST         0x01
+#define _3DPRIM_LINELIST          0x02
+#define _3DPRIM_LINESTRIP         0x03
+#define _3DPRIM_TRILIST           0x04
+#define _3DPRIM_TRISTRIP          0x05
+#define _3DPRIM_TRIFAN            0x06
+#define _3DPRIM_QUADLIST          0x07
+#define _3DPRIM_QUADSTRIP         0x08
+#define _3DPRIM_LINELIST_ADJ      0x09
+#define _3DPRIM_LINESTRIP_ADJ     0x0A
+#define _3DPRIM_TRILIST_ADJ       0x0B
+#define _3DPRIM_TRISTRIP_ADJ      0x0C
+#define _3DPRIM_TRISTRIP_REVERSE  0x0D
+#define _3DPRIM_POLYGON           0x0E
+#define _3DPRIM_RECTLIST          0x0F
+#define _3DPRIM_LINELOOP          0x10
+#define _3DPRIM_POINTLIST_BF      0x11
+#define _3DPRIM_LINESTRIP_CONT    0x12
+#define _3DPRIM_LINESTRIP_BF      0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE  0x15
+
+#define BRW_ANISORATIO_2     0 
+#define BRW_ANISORATIO_4     1 
+#define BRW_ANISORATIO_6     2 
+#define BRW_ANISORATIO_8     3 
+#define BRW_ANISORATIO_10    4 
+#define BRW_ANISORATIO_12    5 
+#define BRW_ANISORATIO_14    6 
+#define BRW_ANISORATIO_16    7
+
+#define BRW_BLENDFACTOR_ONE                 0x1
+#define BRW_BLENDFACTOR_SRC_COLOR           0x2
+#define BRW_BLENDFACTOR_SRC_ALPHA           0x3
+#define BRW_BLENDFACTOR_DST_ALPHA           0x4
+#define BRW_BLENDFACTOR_DST_COLOR           0x5
+#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE  0x6
+#define BRW_BLENDFACTOR_CONST_COLOR         0x7
+#define BRW_BLENDFACTOR_CONST_ALPHA         0x8
+#define BRW_BLENDFACTOR_SRC1_COLOR          0x9
+#define BRW_BLENDFACTOR_SRC1_ALPHA          0x0A
+#define BRW_BLENDFACTOR_ZERO                0x11
+#define BRW_BLENDFACTOR_INV_SRC_COLOR       0x12
+#define BRW_BLENDFACTOR_INV_SRC_ALPHA       0x13
+#define BRW_BLENDFACTOR_INV_DST_ALPHA       0x14
+#define BRW_BLENDFACTOR_INV_DST_COLOR       0x15
+#define BRW_BLENDFACTOR_INV_CONST_COLOR     0x17
+#define BRW_BLENDFACTOR_INV_CONST_ALPHA     0x18
+#define BRW_BLENDFACTOR_INV_SRC1_COLOR      0x19
+#define BRW_BLENDFACTOR_INV_SRC1_ALPHA      0x1A
+
+#define BRW_BLENDFUNCTION_ADD               0
+#define BRW_BLENDFUNCTION_SUBTRACT          1
+#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT  2
+#define BRW_BLENDFUNCTION_MIN               3
+#define BRW_BLENDFUNCTION_MAX               4
+
+#define BRW_ALPHATEST_FORMAT_UNORM8         0
+#define BRW_ALPHATEST_FORMAT_FLOAT32        1
+
+#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH  0
+#define BRW_CHROMAKEY_REPLACE_BLACK      1
+
+#define BRW_CLIP_API_OGL     0
+#define BRW_CLIP_API_DX      1
+
+#define BRW_CLIPMODE_NORMAL              0
+#define BRW_CLIPMODE_CLIP_ALL            1
+#define BRW_CLIPMODE_CLIP_NON_REJECTED   2
+#define BRW_CLIPMODE_REJECT_ALL          3
+#define BRW_CLIPMODE_ACCEPT_ALL          4
+#define BRW_CLIPMODE_KERNEL_CLIP         5
+
+#define BRW_CLIP_NDCSPACE     0
+#define BRW_CLIP_SCREENSPACE  1
+
+#define BRW_COMPAREFUNCTION_ALWAYS       0
+#define BRW_COMPAREFUNCTION_NEVER        1
+#define BRW_COMPAREFUNCTION_LESS         2
+#define BRW_COMPAREFUNCTION_EQUAL        3
+#define BRW_COMPAREFUNCTION_LEQUAL       4
+#define BRW_COMPAREFUNCTION_GREATER      5
+#define BRW_COMPAREFUNCTION_NOTEQUAL     6
+#define BRW_COMPAREFUNCTION_GEQUAL       7
+
+#define BRW_COVERAGE_PIXELS_HALF     0
+#define BRW_COVERAGE_PIXELS_1        1
+#define BRW_COVERAGE_PIXELS_2        2
+#define BRW_COVERAGE_PIXELS_4        3
+
+#define BRW_CULLMODE_BOTH        0
+#define BRW_CULLMODE_NONE        1
+#define BRW_CULLMODE_FRONT       2
+#define BRW_CULLMODE_BACK        3
+
+#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM      0
+#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT  1
+
+#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT     0
+#define BRW_DEPTHFORMAT_D32_FLOAT                1
+#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT        2
+#define BRW_DEPTHFORMAT_D24_UNORM_X8_UINT        3 /* GEN5 */
+#define BRW_DEPTHFORMAT_D16_UNORM                5
+
+#define BRW_FLOATING_POINT_IEEE_754        0
+#define BRW_FLOATING_POINT_NON_IEEE_754    1
+
+#define BRW_FRONTWINDING_CW      0
+#define BRW_FRONTWINDING_CCW     1
+
+#define BRW_SPRITE_POINT_ENABLE  16
+
+#define BRW_INDEX_BYTE     0
+#define BRW_INDEX_WORD     1
+#define BRW_INDEX_DWORD    2
+
+#define BRW_LOGICOPFUNCTION_CLEAR            0
+#define BRW_LOGICOPFUNCTION_NOR              1
+#define BRW_LOGICOPFUNCTION_AND_INVERTED     2
+#define BRW_LOGICOPFUNCTION_COPY_INVERTED    3
+#define BRW_LOGICOPFUNCTION_AND_REVERSE      4
+#define BRW_LOGICOPFUNCTION_INVERT           5
+#define BRW_LOGICOPFUNCTION_XOR              6
+#define BRW_LOGICOPFUNCTION_NAND             7
+#define BRW_LOGICOPFUNCTION_AND              8
+#define BRW_LOGICOPFUNCTION_EQUIV            9
+#define BRW_LOGICOPFUNCTION_NOOP             10
+#define BRW_LOGICOPFUNCTION_OR_INVERTED      11
+#define BRW_LOGICOPFUNCTION_COPY             12
+#define BRW_LOGICOPFUNCTION_OR_REVERSE       13
+#define BRW_LOGICOPFUNCTION_OR               14
+#define BRW_LOGICOPFUNCTION_SET              15  
+
+#define BRW_MAPFILTER_NEAREST        0x0 
+#define BRW_MAPFILTER_LINEAR         0x1 
+#define BRW_MAPFILTER_ANISOTROPIC    0x2
+
+#define BRW_MIPFILTER_NONE        0   
+#define BRW_MIPFILTER_NEAREST     1   
+#define BRW_MIPFILTER_LINEAR      3
+
+#define BRW_ADDRESS_ROUNDING_ENABLE_U_MAG      0x20
+#define BRW_ADDRESS_ROUNDING_ENABLE_U_MIN      0x10
+#define BRW_ADDRESS_ROUNDING_ENABLE_V_MAG      0x08
+#define BRW_ADDRESS_ROUNDING_ENABLE_V_MIN      0x04
+#define BRW_ADDRESS_ROUNDING_ENABLE_R_MAG      0x02
+#define BRW_ADDRESS_ROUNDING_ENABLE_R_MIN      0x01
+
+#define BRW_POLYGON_FRONT_FACING     0
+#define BRW_POLYGON_BACK_FACING      1
+
+#define BRW_PREFILTER_ALWAYS     0x0 
+#define BRW_PREFILTER_NEVER      0x1
+#define BRW_PREFILTER_LESS       0x2
+#define BRW_PREFILTER_EQUAL      0x3
+#define BRW_PREFILTER_LEQUAL     0x4
+#define BRW_PREFILTER_GREATER    0x5
+#define BRW_PREFILTER_NOTEQUAL   0x6
+#define BRW_PREFILTER_GEQUAL     0x7
+
+#define BRW_PROVOKING_VERTEX_0    0
+#define BRW_PROVOKING_VERTEX_1    1 
+#define BRW_PROVOKING_VERTEX_2    2
+
+#define BRW_RASTRULE_UPPER_LEFT  0    
+#define BRW_RASTRULE_UPPER_RIGHT 1
+/* These are listed as "Reserved, but not seen as useful"
+ * in Intel documentation (page 212, "Point Rasterization Rule",
+ * section 7.4 "SF Pipeline State Summary", of document
+ * "Intel® 965 Express Chipset Family and Intel® G35 Express
+ * Chipset Graphics Controller Programmer's Reference Manual,
+ * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
+ * available at 
+ *     http://intellinuxgraphics.org/documentation.html
+ * at the time of this writing).
+ *
+ * These appear to be supported on at least some
+ * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT
+ * is useful when using OpenGL to render to a FBO
+ * (which has the pixel coordinate Y orientation inverted
+ * with respect to the normal OpenGL pixel coordinate system).
+ */
+#define BRW_RASTRULE_LOWER_LEFT  2
+#define BRW_RASTRULE_LOWER_RIGHT 3
+
+#define BRW_RENDERTARGET_CLAMPRANGE_UNORM    0
+#define BRW_RENDERTARGET_CLAMPRANGE_SNORM    1
+#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT   2
+
+#define BRW_STENCILOP_KEEP               0
+#define BRW_STENCILOP_ZERO               1
+#define BRW_STENCILOP_REPLACE            2
+#define BRW_STENCILOP_INCRSAT            3
+#define BRW_STENCILOP_DECRSAT            4
+#define BRW_STENCILOP_INCR               5
+#define BRW_STENCILOP_DECR               6
+#define BRW_STENCILOP_INVERT             7
+
+/* Surface state DW0 */
+#define BRW_SURFACE_RC_READ_WRITE      (1 << 8)
+#define BRW_SURFACE_MIPLAYOUT_SHIFT    10
+#define BRW_SURFACE_MIPMAPLAYOUT_BELOW   0
+#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT   1
+#define BRW_SURFACE_CUBEFACE_ENABLES   0x3f
+#define BRW_SURFACE_BLEND_ENABLED      (1 << 13)
+#define BRW_SURFACE_WRITEDISABLE_B_SHIFT       14
+#define BRW_SURFACE_WRITEDISABLE_G_SHIFT       15
+#define BRW_SURFACE_WRITEDISABLE_R_SHIFT       16
+#define BRW_SURFACE_WRITEDISABLE_A_SHIFT       17
+
+#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT             0x000 
+#define BRW_SURFACEFORMAT_R32G32B32A32_SINT              0x001 
+#define BRW_SURFACEFORMAT_R32G32B32A32_UINT              0x002 
+#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM             0x003 
+#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM             0x004 
+#define BRW_SURFACEFORMAT_R64G64_FLOAT                   0x005 
+#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT             0x006 
+#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED           0x007
+#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED           0x008
+#define BRW_SURFACEFORMAT_R32G32B32_FLOAT                0x040 
+#define BRW_SURFACEFORMAT_R32G32B32_SINT                 0x041 
+#define BRW_SURFACEFORMAT_R32G32B32_UINT                 0x042 
+#define BRW_SURFACEFORMAT_R32G32B32_UNORM                0x043 
+#define BRW_SURFACEFORMAT_R32G32B32_SNORM                0x044 
+#define BRW_SURFACEFORMAT_R32G32B32_SSCALED              0x045 
+#define BRW_SURFACEFORMAT_R32G32B32_USCALED              0x046 
+#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM             0x080 
+#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM             0x081 
+#define BRW_SURFACEFORMAT_R16G16B16A16_SINT              0x082 
+#define BRW_SURFACEFORMAT_R16G16B16A16_UINT              0x083 
+#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT             0x084 
+#define BRW_SURFACEFORMAT_R32G32_FLOAT                   0x085 
+#define BRW_SURFACEFORMAT_R32G32_SINT                    0x086 
+#define BRW_SURFACEFORMAT_R32G32_UINT                    0x087 
+#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS       0x088 
+#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT        0x089 
+#define BRW_SURFACEFORMAT_L32A32_FLOAT                   0x08A 
+#define BRW_SURFACEFORMAT_R32G32_UNORM                   0x08B 
+#define BRW_SURFACEFORMAT_R32G32_SNORM                   0x08C 
+#define BRW_SURFACEFORMAT_R64_FLOAT                      0x08D 
+#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM             0x08E 
+#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT             0x08F 
+#define BRW_SURFACEFORMAT_A32X32_FLOAT                   0x090 
+#define BRW_SURFACEFORMAT_L32X32_FLOAT                   0x091 
+#define BRW_SURFACEFORMAT_I32X32_FLOAT                   0x092 
+#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED           0x093
+#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED           0x094
+#define BRW_SURFACEFORMAT_R32G32_SSCALED                 0x095
+#define BRW_SURFACEFORMAT_R32G32_USCALED                 0x096
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM                 0x0C0 
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB            0x0C1 
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM              0x0C2 
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB         0x0C3 
+#define BRW_SURFACEFORMAT_R10G10B10A2_UINT               0x0C4 
+#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM       0x0C5 
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM                 0x0C7 
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB            0x0C8 
+#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM                 0x0C9 
+#define BRW_SURFACEFORMAT_R8G8B8A8_SINT                  0x0CA 
+#define BRW_SURFACEFORMAT_R8G8B8A8_UINT                  0x0CB 
+#define BRW_SURFACEFORMAT_R16G16_UNORM                   0x0CC 
+#define BRW_SURFACEFORMAT_R16G16_SNORM                   0x0CD 
+#define BRW_SURFACEFORMAT_R16G16_SINT                    0x0CE 
+#define BRW_SURFACEFORMAT_R16G16_UINT                    0x0CF 
+#define BRW_SURFACEFORMAT_R16G16_FLOAT                   0x0D0 
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM              0x0D1 
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB         0x0D2 
+#define BRW_SURFACEFORMAT_R11G11B10_FLOAT                0x0D3 
+#define BRW_SURFACEFORMAT_R32_SINT                       0x0D6 
+#define BRW_SURFACEFORMAT_R32_UINT                       0x0D7 
+#define BRW_SURFACEFORMAT_R32_FLOAT                      0x0D8 
+#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS          0x0D9 
+#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT           0x0DA 
+#define BRW_SURFACEFORMAT_L16A16_UNORM                   0x0DF 
+#define BRW_SURFACEFORMAT_I24X8_UNORM                    0x0E0 
+#define BRW_SURFACEFORMAT_L24X8_UNORM                    0x0E1 
+#define BRW_SURFACEFORMAT_A24X8_UNORM                    0x0E2 
+#define BRW_SURFACEFORMAT_I32_FLOAT                      0x0E3 
+#define BRW_SURFACEFORMAT_L32_FLOAT                      0x0E4 
+#define BRW_SURFACEFORMAT_A32_FLOAT                      0x0E5 
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM                 0x0E9 
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB            0x0EA 
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM                 0x0EB 
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB            0x0EC 
+#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP             0x0ED 
+#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM              0x0EE 
+#define BRW_SURFACEFORMAT_L16A16_FLOAT                   0x0F0 
+#define BRW_SURFACEFORMAT_R32_UNORM                      0x0F1 
+#define BRW_SURFACEFORMAT_R32_SNORM                      0x0F2 
+#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED            0x0F3
+#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED               0x0F4
+#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED               0x0F5
+#define BRW_SURFACEFORMAT_R16G16_SSCALED                 0x0F6
+#define BRW_SURFACEFORMAT_R16G16_USCALED                 0x0F7
+#define BRW_SURFACEFORMAT_R32_SSCALED                    0x0F8
+#define BRW_SURFACEFORMAT_R32_USCALED                    0x0F9
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM                   0x100 
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB              0x101 
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM                 0x102 
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB            0x103 
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM                 0x104 
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB            0x105 
+#define BRW_SURFACEFORMAT_R8G8_UNORM                     0x106 
+#define BRW_SURFACEFORMAT_R8G8_SNORM                     0x107 
+#define BRW_SURFACEFORMAT_R8G8_SINT                      0x108 
+#define BRW_SURFACEFORMAT_R8G8_UINT                      0x109 
+#define BRW_SURFACEFORMAT_R16_UNORM                      0x10A 
+#define BRW_SURFACEFORMAT_R16_SNORM                      0x10B 
+#define BRW_SURFACEFORMAT_R16_SINT                       0x10C 
+#define BRW_SURFACEFORMAT_R16_UINT                       0x10D 
+#define BRW_SURFACEFORMAT_R16_FLOAT                      0x10E 
+#define BRW_SURFACEFORMAT_I16_UNORM                      0x111 
+#define BRW_SURFACEFORMAT_L16_UNORM                      0x112 
+#define BRW_SURFACEFORMAT_A16_UNORM                      0x113 
+#define BRW_SURFACEFORMAT_L8A8_UNORM                     0x114 
+#define BRW_SURFACEFORMAT_I16_FLOAT                      0x115
+#define BRW_SURFACEFORMAT_L16_FLOAT                      0x116
+#define BRW_SURFACEFORMAT_A16_FLOAT                      0x117
+#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB                0x118
+#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM            0x119
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM                 0x11A
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB            0x11B
+#define BRW_SURFACEFORMAT_R8G8_SSCALED                   0x11C
+#define BRW_SURFACEFORMAT_R8G8_USCALED                   0x11D
+#define BRW_SURFACEFORMAT_R16_SSCALED                    0x11E
+#define BRW_SURFACEFORMAT_R16_USCALED                    0x11F
+#define BRW_SURFACEFORMAT_R8_UNORM                       0x140 
+#define BRW_SURFACEFORMAT_R8_SNORM                       0x141 
+#define BRW_SURFACEFORMAT_R8_SINT                        0x142 
+#define BRW_SURFACEFORMAT_R8_UINT                        0x143 
+#define BRW_SURFACEFORMAT_A8_UNORM                       0x144 
+#define BRW_SURFACEFORMAT_I8_UNORM                       0x145 
+#define BRW_SURFACEFORMAT_L8_UNORM                       0x146 
+#define BRW_SURFACEFORMAT_P4A4_UNORM                     0x147 
+#define BRW_SURFACEFORMAT_A4P4_UNORM                     0x148
+#define BRW_SURFACEFORMAT_R8_SSCALED                     0x149
+#define BRW_SURFACEFORMAT_R8_USCALED                     0x14A
+#define BRW_SURFACEFORMAT_L8_UNORM_SRGB                  0x14C
+#define BRW_SURFACEFORMAT_DXT1_RGB_SRGB                  0x180
+#define BRW_SURFACEFORMAT_R1_UINT                        0x181 
+#define BRW_SURFACEFORMAT_YCRCB_NORMAL                   0x182 
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY                  0x183 
+#define BRW_SURFACEFORMAT_BC1_UNORM                      0x186 
+#define BRW_SURFACEFORMAT_BC2_UNORM                      0x187 
+#define BRW_SURFACEFORMAT_BC3_UNORM                      0x188 
+#define BRW_SURFACEFORMAT_BC4_UNORM                      0x189 
+#define BRW_SURFACEFORMAT_BC5_UNORM                      0x18A 
+#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB                 0x18B 
+#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB                 0x18C 
+#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB                 0x18D 
+#define BRW_SURFACEFORMAT_MONO8                          0x18E 
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUV                   0x18F 
+#define BRW_SURFACEFORMAT_YCRCB_SWAPY                    0x190 
+#define BRW_SURFACEFORMAT_DXT1_RGB                       0x191 
+#define BRW_SURFACEFORMAT_FXT1                           0x192 
+#define BRW_SURFACEFORMAT_R8G8B8_UNORM                   0x193 
+#define BRW_SURFACEFORMAT_R8G8B8_SNORM                   0x194 
+#define BRW_SURFACEFORMAT_R8G8B8_SSCALED                 0x195 
+#define BRW_SURFACEFORMAT_R8G8B8_USCALED                 0x196 
+#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT             0x197 
+#define BRW_SURFACEFORMAT_R64G64B64_FLOAT                0x198 
+#define BRW_SURFACEFORMAT_BC4_SNORM                      0x199 
+#define BRW_SURFACEFORMAT_BC5_SNORM                      0x19A 
+#define BRW_SURFACEFORMAT_R16G16B16_UNORM                0x19C 
+#define BRW_SURFACEFORMAT_R16G16B16_SNORM                0x19D 
+#define BRW_SURFACEFORMAT_R16G16B16_SSCALED              0x19E 
+#define BRW_SURFACEFORMAT_R16G16B16_USCALED              0x19F
+#define BRW_SURFACE_FORMAT_SHIFT       18
+#define BRW_SURFACE_FORMAT_MASK                INTEL_MASK(26, 18)
+
+#define BRW_SURFACERETURNFORMAT_FLOAT32  0
+#define BRW_SURFACERETURNFORMAT_S1       1
+
+#define BRW_SURFACE_TYPE_SHIFT         29
+#define BRW_SURFACE_TYPE_MASK          INTEL_MASK(31, 29)
+#define BRW_SURFACE_1D      0
+#define BRW_SURFACE_2D      1
+#define BRW_SURFACE_3D      2
+#define BRW_SURFACE_CUBE    3
+#define BRW_SURFACE_BUFFER  4
+#define BRW_SURFACE_NULL    7
+
+/* Surface state DW2 */
+#define BRW_SURFACE_HEIGHT_SHIFT       19
+#define BRW_SURFACE_HEIGHT_MASK                INTEL_MASK(31, 19)
+#define BRW_SURFACE_WIDTH_SHIFT                6
+#define BRW_SURFACE_WIDTH_MASK         INTEL_MASK(18, 6)
+#define BRW_SURFACE_LOD_SHIFT          2
+#define BRW_SURFACE_LOD_MASK           INTEL_MASK(5, 2)
+
+/* Surface state DW3 */
+#define BRW_SURFACE_DEPTH_SHIFT                21
+#define BRW_SURFACE_DEPTH_MASK         INTEL_MASK(31, 21)
+#define BRW_SURFACE_PITCH_SHIFT                3
+#define BRW_SURFACE_PITCH_MASK         INTEL_MASK(19, 3)
+#define BRW_SURFACE_TILED              (1 << 1)
+#define BRW_SURFACE_TILED_Y            (1 << 0)
+
+/* Surface state DW4 */
+#define BRW_SURFACE_MIN_LOD_SHIFT      28
+#define BRW_SURFACE_MIN_LOD_MASK       INTEL_MASK(31, 28)
+
+/* Surface state DW5 */
+#define BRW_SURFACE_X_OFFSET_SHIFT             25
+#define BRW_SURFACE_X_OFFSET_MASK              INTEL_MASK(31, 25)
+#define BRW_SURFACE_VERTICAL_ALIGN_ENABLE      (1 << 24)
+#define BRW_SURFACE_Y_OFFSET_SHIFT             20
+#define BRW_SURFACE_Y_OFFSET_MASK              INTEL_MASK(23, 20)
+
+#define BRW_TEXCOORDMODE_WRAP            0
+#define BRW_TEXCOORDMODE_MIRROR          1
+#define BRW_TEXCOORDMODE_CLAMP           2
+#define BRW_TEXCOORDMODE_CUBE            3
+#define BRW_TEXCOORDMODE_CLAMP_BORDER    4
+#define BRW_TEXCOORDMODE_MIRROR_ONCE     5
+
+#define BRW_THREAD_PRIORITY_NORMAL   0
+#define BRW_THREAD_PRIORITY_HIGH     1
+
+#define BRW_TILEWALK_XMAJOR                 0
+#define BRW_TILEWALK_YMAJOR                 1
+
+#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS  0
+#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS  1
+
+/* Execution Unit (EU) defines
+ */
+
+#define BRW_ALIGN_1   0
+#define BRW_ALIGN_16  1
+
+#define BRW_ADDRESS_DIRECT                        0
+#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER    1
+
+#define BRW_CHANNEL_X     0
+#define BRW_CHANNEL_Y     1
+#define BRW_CHANNEL_Z     2
+#define BRW_CHANNEL_W     3
+
+enum brw_compression {
+   BRW_COMPRESSION_NONE       = 0,
+   BRW_COMPRESSION_2NDHALF    = 1,
+   BRW_COMPRESSION_COMPRESSED = 2,
+};
+
+#define GEN6_COMPRESSION_1Q            0
+#define GEN6_COMPRESSION_2Q            1
+#define GEN6_COMPRESSION_3Q            2
+#define GEN6_COMPRESSION_4Q            3
+#define GEN6_COMPRESSION_1H            0
+#define GEN6_COMPRESSION_2H            2
+
+#define BRW_CONDITIONAL_NONE  0
+#define BRW_CONDITIONAL_Z     1
+#define BRW_CONDITIONAL_NZ    2
+#define BRW_CONDITIONAL_EQ    1        /* Z */
+#define BRW_CONDITIONAL_NEQ   2        /* NZ */
+#define BRW_CONDITIONAL_G     3
+#define BRW_CONDITIONAL_GE    4
+#define BRW_CONDITIONAL_L     5
+#define BRW_CONDITIONAL_LE    6
+#define BRW_CONDITIONAL_R     7
+#define BRW_CONDITIONAL_O     8
+#define BRW_CONDITIONAL_U     9
+
+#define BRW_DEBUG_NONE        0
+#define BRW_DEBUG_BREAKPOINT  1
+
+#define BRW_DEPENDENCY_NORMAL         0
+#define BRW_DEPENDENCY_NOTCLEARED     1
+#define BRW_DEPENDENCY_NOTCHECKED     2
+#define BRW_DEPENDENCY_DISABLE        3
+
+#define BRW_EXECUTE_1     0
+#define BRW_EXECUTE_2     1
+#define BRW_EXECUTE_4     2
+#define BRW_EXECUTE_8     3
+#define BRW_EXECUTE_16    4
+#define BRW_EXECUTE_32    5
+
+#define BRW_HORIZONTAL_STRIDE_0   0
+#define BRW_HORIZONTAL_STRIDE_1   1
+#define BRW_HORIZONTAL_STRIDE_2   2
+#define BRW_HORIZONTAL_STRIDE_4   3
+
+#define BRW_INSTRUCTION_NORMAL    0
+#define BRW_INSTRUCTION_SATURATE  1
+
+#define BRW_MASK_ENABLE   0
+#define BRW_MASK_DISABLE  1
+
+/** @{
+ *
+ * Gen6 has replaced "mask enable/disable" with WECtrl, which is
+ * effectively the same but much simpler to think about.  Now, there
+ * are two contributors ANDed together to whether channels are
+ * executed: The predication on the instruction, and the channel write
+ * enable.
+ */
+/**
+ * This is the default value.  It means that a channel's write enable is set
+ * if the per-channel IP is pointing at this instruction.
+ */
+#define BRW_WE_NORMAL          0
+/**
+ * This is used like BRW_MASK_DISABLE, and causes all channels to have
+ * their write enable set.  Note that predication still contributes to
+ * whether the channel actually gets written.
+ */
+#define BRW_WE_ALL             1
+/** @} */
+
+enum opcode {
+   /* These are the actual hardware opcodes. */
+   BRW_OPCODE_MOV =    1,
+   BRW_OPCODE_SEL =    2,
+   BRW_OPCODE_NOT =    4,
+   BRW_OPCODE_AND =    5,
+   BRW_OPCODE_OR =     6,
+   BRW_OPCODE_XOR =    7,
+   BRW_OPCODE_SHR =    8,
+   BRW_OPCODE_SHL =    9,
+   BRW_OPCODE_RSR =    10,
+   BRW_OPCODE_RSL =    11,
+   BRW_OPCODE_ASR =    12,
+   BRW_OPCODE_CMP =    16,
+   BRW_OPCODE_CMPN =   17,
+   BRW_OPCODE_JMPI =   32,
+   BRW_OPCODE_IF =     34,
+   BRW_OPCODE_IFF =    35,
+   BRW_OPCODE_ELSE =   36,
+   BRW_OPCODE_ENDIF =  37,
+   BRW_OPCODE_DO =     38,
+   BRW_OPCODE_WHILE =  39,
+   BRW_OPCODE_BREAK =  40,
+   BRW_OPCODE_CONTINUE = 41,
+   BRW_OPCODE_HALT =   42,
+   BRW_OPCODE_MSAVE =  44,
+   BRW_OPCODE_MRESTORE = 45,
+   BRW_OPCODE_PUSH =   46,
+   BRW_OPCODE_POP =    47,
+   BRW_OPCODE_WAIT =   48,
+   BRW_OPCODE_SEND =   49,
+   BRW_OPCODE_SENDC =  50,
+   BRW_OPCODE_MATH =   56,
+   BRW_OPCODE_ADD =    64,
+   BRW_OPCODE_MUL =    65,
+   BRW_OPCODE_AVG =    66,
+   BRW_OPCODE_FRC =    67,
+   BRW_OPCODE_RNDU =   68,
+   BRW_OPCODE_RNDD =   69,
+   BRW_OPCODE_RNDE =   70,
+   BRW_OPCODE_RNDZ =   71,
+   BRW_OPCODE_MAC =    72,
+   BRW_OPCODE_MACH =   73,
+   BRW_OPCODE_LZD =    74,
+   BRW_OPCODE_SAD2 =   80,
+   BRW_OPCODE_SADA2 =  81,
+   BRW_OPCODE_DP4 =    84,
+   BRW_OPCODE_DPH =    85,
+   BRW_OPCODE_DP3 =    86,
+   BRW_OPCODE_DP2 =    87,
+   BRW_OPCODE_DPA2 =   88,
+   BRW_OPCODE_LINE =   89,
+   BRW_OPCODE_PLN =    90,
+   BRW_OPCODE_MAD =    91,
+   BRW_OPCODE_NOP =    126,
+
+   /* These are compiler backend opcodes that get translated into other
+    * instructions.
+    */
+   FS_OPCODE_FB_WRITE = 128,
+   SHADER_OPCODE_RCP,
+   SHADER_OPCODE_RSQ,
+   SHADER_OPCODE_SQRT,
+   SHADER_OPCODE_EXP2,
+   SHADER_OPCODE_LOG2,
+   SHADER_OPCODE_POW,
+   SHADER_OPCODE_INT_QUOTIENT,
+   SHADER_OPCODE_INT_REMAINDER,
+   SHADER_OPCODE_SIN,
+   SHADER_OPCODE_COS,
+
+   SHADER_OPCODE_TEX,
+   SHADER_OPCODE_TXD,
+   SHADER_OPCODE_TXF,
+   SHADER_OPCODE_TXL,
+   SHADER_OPCODE_TXS,
+   FS_OPCODE_TXB,
+
+   FS_OPCODE_DDX,
+   FS_OPCODE_DDY,
+   FS_OPCODE_PIXEL_X,
+   FS_OPCODE_PIXEL_Y,
+   FS_OPCODE_CINTERP,
+   FS_OPCODE_LINTERP,
+   FS_OPCODE_DISCARD,
+   FS_OPCODE_SPILL,
+   FS_OPCODE_UNSPILL,
+   FS_OPCODE_PULL_CONSTANT_LOAD,
+
+   VS_OPCODE_URB_WRITE,
+   VS_OPCODE_SCRATCH_READ,
+   VS_OPCODE_SCRATCH_WRITE,
+   VS_OPCODE_PULL_CONSTANT_LOAD,
+};
+
+#define BRW_PREDICATE_NONE             0
+#define BRW_PREDICATE_NORMAL           1
+#define BRW_PREDICATE_ALIGN1_ANYV             2
+#define BRW_PREDICATE_ALIGN1_ALLV             3
+#define BRW_PREDICATE_ALIGN1_ANY2H            4
+#define BRW_PREDICATE_ALIGN1_ALL2H            5
+#define BRW_PREDICATE_ALIGN1_ANY4H            6
+#define BRW_PREDICATE_ALIGN1_ALL4H            7
+#define BRW_PREDICATE_ALIGN1_ANY8H            8
+#define BRW_PREDICATE_ALIGN1_ALL8H            9
+#define BRW_PREDICATE_ALIGN1_ANY16H           10
+#define BRW_PREDICATE_ALIGN1_ALL16H           11
+#define BRW_PREDICATE_ALIGN16_REPLICATE_X     2
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Y     3
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Z     4
+#define BRW_PREDICATE_ALIGN16_REPLICATE_W     5
+#define BRW_PREDICATE_ALIGN16_ANY4H           6
+#define BRW_PREDICATE_ALIGN16_ALL4H           7
+
+#define BRW_ARCHITECTURE_REGISTER_FILE    0
+#define BRW_GENERAL_REGISTER_FILE         1
+#define BRW_MESSAGE_REGISTER_FILE         2
+#define BRW_IMMEDIATE_VALUE               3
+
+#define BRW_REGISTER_TYPE_UD  0
+#define BRW_REGISTER_TYPE_D   1
+#define BRW_REGISTER_TYPE_UW  2
+#define BRW_REGISTER_TYPE_W   3
+#define BRW_REGISTER_TYPE_UB  4
+#define BRW_REGISTER_TYPE_B   5
+#define BRW_REGISTER_TYPE_VF  5        /* packed float vector, immediates only? */
+#define BRW_REGISTER_TYPE_HF  6
+#define BRW_REGISTER_TYPE_V   6        /* packed int vector, immediates only, uword dest only */
+#define BRW_REGISTER_TYPE_F   7
+
+#define BRW_ARF_NULL                  0x00
+#define BRW_ARF_ADDRESS               0x10
+#define BRW_ARF_ACCUMULATOR           0x20   
+#define BRW_ARF_FLAG                  0x30
+#define BRW_ARF_MASK                  0x40
+#define BRW_ARF_MASK_STACK            0x50
+#define BRW_ARF_MASK_STACK_DEPTH      0x60
+#define BRW_ARF_STATE                 0x70
+#define BRW_ARF_CONTROL               0x80
+#define BRW_ARF_NOTIFICATION_COUNT    0x90
+#define BRW_ARF_IP                    0xA0
+
+#define BRW_MRF_COMPR4                 (1 << 7)
+
+#define BRW_AMASK   0
+#define BRW_IMASK   1
+#define BRW_LMASK   2
+#define BRW_CMASK   3
+
+
+
+#define BRW_THREAD_NORMAL     0
+#define BRW_THREAD_ATOMIC     1
+#define BRW_THREAD_SWITCH     2
+
+#define BRW_VERTICAL_STRIDE_0                 0
+#define BRW_VERTICAL_STRIDE_1                 1
+#define BRW_VERTICAL_STRIDE_2                 2
+#define BRW_VERTICAL_STRIDE_4                 3
+#define BRW_VERTICAL_STRIDE_8                 4
+#define BRW_VERTICAL_STRIDE_16                5
+#define BRW_VERTICAL_STRIDE_32                6
+#define BRW_VERTICAL_STRIDE_64                7
+#define BRW_VERTICAL_STRIDE_128               8
+#define BRW_VERTICAL_STRIDE_256               9
+#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL   0xF
+
+#define BRW_WIDTH_1       0
+#define BRW_WIDTH_2       1
+#define BRW_WIDTH_4       2
+#define BRW_WIDTH_8       3
+#define BRW_WIDTH_16      4
+
+#define BRW_STATELESS_BUFFER_BOUNDARY_1K      0
+#define BRW_STATELESS_BUFFER_BOUNDARY_2K      1
+#define BRW_STATELESS_BUFFER_BOUNDARY_4K      2
+#define BRW_STATELESS_BUFFER_BOUNDARY_8K      3
+#define BRW_STATELESS_BUFFER_BOUNDARY_16K     4
+#define BRW_STATELESS_BUFFER_BOUNDARY_32K     5
+#define BRW_STATELESS_BUFFER_BOUNDARY_64K     6
+#define BRW_STATELESS_BUFFER_BOUNDARY_128K    7
+#define BRW_STATELESS_BUFFER_BOUNDARY_256K    8
+#define BRW_STATELESS_BUFFER_BOUNDARY_512K    9
+#define BRW_STATELESS_BUFFER_BOUNDARY_1M      10
+#define BRW_STATELESS_BUFFER_BOUNDARY_2M      11
+
+#define BRW_POLYGON_FACING_FRONT      0
+#define BRW_POLYGON_FACING_BACK       1
+
+/**
+ * Message target: Shared Function ID for where to SEND a message.
+ *
+ * These are enumerated in the ISA reference under "send - Send Message".
+ * In particular, see the following tables:
+ * - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition"
+ * - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor"
+ * - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) /
+ *   Overview / GPE Function IDs
+ */
+enum brw_message_target {
+   BRW_SFID_NULL                     = 0,
+   BRW_SFID_MATH                     = 1, /* Only valid on Gen4-5 */
+   BRW_SFID_SAMPLER                  = 2,
+   BRW_SFID_MESSAGE_GATEWAY          = 3,
+   BRW_SFID_DATAPORT_READ            = 4,
+   BRW_SFID_DATAPORT_WRITE           = 5,
+   BRW_SFID_URB                      = 6,
+   BRW_SFID_THREAD_SPAWNER           = 7,
+
+   GEN6_SFID_DATAPORT_SAMPLER_CACHE  = 4,
+   GEN6_SFID_DATAPORT_RENDER_CACHE   = 5,
+   GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
+
+   GEN7_SFID_DATAPORT_DATA_CACHE     = 10,
+};
+
+#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE     10
+
+#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32     0
+#define BRW_SAMPLER_RETURN_FORMAT_UINT32      2
+#define BRW_SAMPLER_RETURN_FORMAT_SINT32      3
+
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE              0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE             0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS        0
+#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX             1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD        1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD         1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS  2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS    2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE    0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE     2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE 1
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE  1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO           2
+#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO            2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD                3
+#define BRW_SAMPLER_MESSAGE_SIMD8_LD                  3
+#define BRW_SAMPLER_MESSAGE_SIMD16_LD                 3
+
+#define GEN5_SAMPLER_MESSAGE_SAMPLE              0
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS         1
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD          2
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE      3
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS       4
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE  6
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD           7
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO      10
+
+/* for GEN5 only */
+#define BRW_SAMPLER_SIMD_MODE_SIMD4X2                   0
+#define BRW_SAMPLER_SIMD_MODE_SIMD8                     1
+#define BRW_SAMPLER_SIMD_MODE_SIMD16                    2
+#define BRW_SAMPLER_SIMD_MODE_SIMD32_64                 3
+
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW   0
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH  1
+#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS     2
+#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS     3
+#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS     4
+
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD     0
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS    2
+
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS   2
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS  3
+
+/* This one stays the same across generations. */
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ          0
+/* GEN4 */
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     1
+#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          2
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      3
+/* G45, GEN5 */
+#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ        1
+#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     2
+#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ     3
+#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          4
+#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      6
+/* GEN6 */
+#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ       1
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     2
+#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          4
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ  5
+#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      6
+
+#define BRW_DATAPORT_READ_TARGET_DATA_CACHE      0
+#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE    1
+#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE   2
+
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE                0
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED     1
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01         2
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23         3
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01       4
+
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE                0
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE           1
+#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE                2
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE            3
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE              4
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE     5
+#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE               7
+
+/* GEN6 */
+#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE              7
+#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE               8
+#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE          9
+#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE               10
+#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE           11
+#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE             12
+#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE               13
+#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE       14
+
+/* GEN7 */
+#define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE          10
+
+#define BRW_MATH_FUNCTION_INV                              1
+#define BRW_MATH_FUNCTION_LOG                              2
+#define BRW_MATH_FUNCTION_EXP                              3
+#define BRW_MATH_FUNCTION_SQRT                             4
+#define BRW_MATH_FUNCTION_RSQ                              5
+#define BRW_MATH_FUNCTION_SIN                              6 /* was 7 */
+#define BRW_MATH_FUNCTION_COS                              7 /* was 8 */
+#define BRW_MATH_FUNCTION_SINCOS                           8 /* was 6 */
+#define BRW_MATH_FUNCTION_TAN                              9 /* gen4 */
+#define BRW_MATH_FUNCTION_FDIV                             9 /* gen6+ */
+#define BRW_MATH_FUNCTION_POW                              10
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER   11
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT                 12
+#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER                13
+
+#define BRW_MATH_INTEGER_UNSIGNED     0
+#define BRW_MATH_INTEGER_SIGNED       1
+
+#define BRW_MATH_PRECISION_FULL        0
+#define BRW_MATH_PRECISION_PARTIAL     1
+
+#define BRW_MATH_SATURATE_NONE         0
+#define BRW_MATH_SATURATE_SATURATE     1
+
+#define BRW_MATH_DATA_VECTOR  0
+#define BRW_MATH_DATA_SCALAR  1
+
+#define BRW_URB_OPCODE_WRITE  0
+
+#define BRW_URB_SWIZZLE_NONE          0
+#define BRW_URB_SWIZZLE_INTERLEAVE    1
+#define BRW_URB_SWIZZLE_TRANSPOSE     2
+
+#define BRW_SCRATCH_SPACE_SIZE_1K     0
+#define BRW_SCRATCH_SPACE_SIZE_2K     1
+#define BRW_SCRATCH_SPACE_SIZE_4K     2
+#define BRW_SCRATCH_SPACE_SIZE_8K     3
+#define BRW_SCRATCH_SPACE_SIZE_16K    4
+#define BRW_SCRATCH_SPACE_SIZE_32K    5
+#define BRW_SCRATCH_SPACE_SIZE_64K    6
+#define BRW_SCRATCH_SPACE_SIZE_128K   7
+#define BRW_SCRATCH_SPACE_SIZE_256K   8
+#define BRW_SCRATCH_SPACE_SIZE_512K   9
+#define BRW_SCRATCH_SPACE_SIZE_1M     10
+#define BRW_SCRATCH_SPACE_SIZE_2M     11
+
+
+
+
+#define CMD_URB_FENCE                 0x6000
+#define CMD_CS_URB_STATE              0x6001
+#define CMD_CONST_BUFFER              0x6002
+
+#define CMD_STATE_BASE_ADDRESS        0x6101
+#define CMD_STATE_SIP                 0x6102
+#define CMD_PIPELINE_SELECT_965       0x6104
+#define CMD_PIPELINE_SELECT_GM45      0x6904
+
+#define _3DSTATE_PIPELINED_POINTERS            0x7800
+#define _3DSTATE_BINDING_TABLE_POINTERS                0x7801
+# define GEN6_BINDING_TABLE_MODIFY_VS  (1 << 8)
+# define GEN6_BINDING_TABLE_MODIFY_GS  (1 << 9)
+# define GEN6_BINDING_TABLE_MODIFY_PS  (1 << 12)
+
+#define _3DSTATE_BINDING_TABLE_POINTERS_VS     0x7826 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_HS     0x7827 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_DS     0x7828 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_GS     0x7829 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_PS     0x782A /* GEN7+ */
+
+#define _3DSTATE_SAMPLER_STATE_POINTERS                0x7802 /* GEN6+ */
+# define PS_SAMPLER_STATE_CHANGE                               (1 << 12)
+# define GS_SAMPLER_STATE_CHANGE                               (1 << 9)
+# define VS_SAMPLER_STATE_CHANGE                               (1 << 8)
+/* DW1: VS */
+/* DW2: GS */
+/* DW3: PS */
+
+#define _3DSTATE_SAMPLER_STATE_POINTERS_VS     0x782B /* GEN7+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS_GS     0x782E /* GEN7+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS_PS     0x782F /* GEN7+ */
+
+#define _3DSTATE_VERTEX_BUFFERS       0x7808
+# define BRW_VB0_INDEX_SHIFT           27
+# define GEN6_VB0_INDEX_SHIFT          26
+# define BRW_VB0_ACCESS_VERTEXDATA     (0 << 26)
+# define BRW_VB0_ACCESS_INSTANCEDATA   (1 << 26)
+# define GEN6_VB0_ACCESS_VERTEXDATA    (0 << 20)
+# define GEN6_VB0_ACCESS_INSTANCEDATA  (1 << 20)
+# define GEN7_VB0_ADDRESS_MODIFYENABLE  (1 << 14)
+# define BRW_VB0_PITCH_SHIFT           0
+
+#define _3DSTATE_VERTEX_ELEMENTS      0x7809
+# define BRW_VE0_INDEX_SHIFT           27
+# define GEN6_VE0_INDEX_SHIFT          26
+# define BRW_VE0_FORMAT_SHIFT          16
+# define BRW_VE0_VALID                 (1 << 26)
+# define GEN6_VE0_VALID                        (1 << 25)
+# define BRW_VE0_SRC_OFFSET_SHIFT      0
+# define BRW_VE1_COMPONENT_NOSTORE     0
+# define BRW_VE1_COMPONENT_STORE_SRC   1
+# define BRW_VE1_COMPONENT_STORE_0     2
+# define BRW_VE1_COMPONENT_STORE_1_FLT 3
+# define BRW_VE1_COMPONENT_STORE_1_INT 4
+# define BRW_VE1_COMPONENT_STORE_VID   5
+# define BRW_VE1_COMPONENT_STORE_IID   6
+# define BRW_VE1_COMPONENT_STORE_PID   7
+# define BRW_VE1_COMPONENT_0_SHIFT     28
+# define BRW_VE1_COMPONENT_1_SHIFT     24
+# define BRW_VE1_COMPONENT_2_SHIFT     20
+# define BRW_VE1_COMPONENT_3_SHIFT     16
+# define BRW_VE1_DST_OFFSET_SHIFT      0
+
+#define CMD_INDEX_BUFFER              0x780a
+#define GEN4_3DSTATE_VF_STATISTICS             0x780b
+#define GM45_3DSTATE_VF_STATISTICS             0x680b
+#define _3DSTATE_CC_STATE_POINTERS             0x780e /* GEN6+ */
+#define _3DSTATE_BLEND_STATE_POINTERS          0x7824 /* GEN7+ */
+#define _3DSTATE_DEPTH_STENCIL_STATE_POINTERS  0x7825 /* GEN7+ */
+
+#define _3DSTATE_URB                           0x7805 /* GEN6 */
+# define GEN6_URB_VS_SIZE_SHIFT                                16
+# define GEN6_URB_VS_ENTRIES_SHIFT                     0
+# define GEN6_URB_GS_ENTRIES_SHIFT                     8
+# define GEN6_URB_GS_SIZE_SHIFT                                0
+
+#define _3DSTATE_URB_VS                         0x7830 /* GEN7+ */
+#define _3DSTATE_URB_HS                         0x7831 /* GEN7+ */
+#define _3DSTATE_URB_DS                         0x7832 /* GEN7+ */
+#define _3DSTATE_URB_GS                         0x7833 /* GEN7+ */
+# define GEN7_URB_ENTRY_SIZE_SHIFT                      16
+# define GEN7_URB_STARTING_ADDRESS_SHIFT                25
+
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS         0x7912 /* GEN7+ */
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS         0x7916 /* GEN7+ */
+# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT         16
+
+#define _3DSTATE_VIEWPORT_STATE_POINTERS       0x780d /* GEN6+ */
+# define GEN6_CC_VIEWPORT_MODIFY                       (1 << 12)
+# define GEN6_SF_VIEWPORT_MODIFY                       (1 << 11)
+# define GEN6_CLIP_VIEWPORT_MODIFY                     (1 << 10)
+
+#define _3DSTATE_VIEWPORT_STATE_POINTERS_CC    0x7823 /* GEN7+ */
+#define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL 0x7821 /* GEN7+ */
+
+#define _3DSTATE_SCISSOR_STATE_POINTERS                0x780f /* GEN6+ */
+
+#define _3DSTATE_VS                            0x7810 /* GEN6+ */
+/* DW2 */
+# define GEN6_VS_SPF_MODE                              (1 << 31)
+# define GEN6_VS_VECTOR_MASK_ENABLE                    (1 << 30)
+# define GEN6_VS_SAMPLER_COUNT_SHIFT                   27
+# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT       18
+# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754          (0 << 16)
+# define GEN6_VS_FLOATING_POINT_MODE_ALT               (1 << 16)
+/* DW4 */
+# define GEN6_VS_DISPATCH_START_GRF_SHIFT              20
+# define GEN6_VS_URB_READ_LENGTH_SHIFT                 11
+# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT           4
+/* DW5 */
+# define GEN6_VS_MAX_THREADS_SHIFT                     25
+# define GEN6_VS_STATISTICS_ENABLE                     (1 << 10)
+# define GEN6_VS_CACHE_DISABLE                         (1 << 1)
+# define GEN6_VS_ENABLE                                        (1 << 0)
+
+#define _3DSTATE_GS                            0x7811 /* GEN6+ */
+/* DW2 */
+# define GEN6_GS_SPF_MODE                              (1 << 31)
+# define GEN6_GS_VECTOR_MASK_ENABLE                    (1 << 30)
+# define GEN6_GS_SAMPLER_COUNT_SHIFT                   27
+# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT       18
+# define GEN6_GS_FLOATING_POINT_MODE_IEEE_754          (0 << 16)
+# define GEN6_GS_FLOATING_POINT_MODE_ALT               (1 << 16)
+/* DW4 */
+# define GEN6_GS_URB_READ_LENGTH_SHIFT                 11
+# define GEN7_GS_INCLUDE_VERTEX_HANDLES                        (1 << 10)
+# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT           4
+# define GEN6_GS_DISPATCH_START_GRF_SHIFT              0
+/* DW5 */
+# define GEN6_GS_MAX_THREADS_SHIFT                     25
+# define GEN6_GS_STATISTICS_ENABLE                     (1 << 10)
+# define GEN6_GS_SO_STATISTICS_ENABLE                  (1 << 9)
+# define GEN6_GS_RENDERING_ENABLE                      (1 << 8)
+# define GEN7_GS_ENABLE                                        (1 << 0)
+/* DW6 */
+# define GEN6_GS_REORDER                               (1 << 30)
+# define GEN6_GS_DISCARD_ADJACENCY                     (1 << 29)
+# define GEN6_GS_SVBI_PAYLOAD_ENABLE                   (1 << 28)
+# define GEN6_GS_SVBI_POSTINCREMENT_ENABLE             (1 << 27)
+# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT                16
+# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK         INTEL_MASK(25, 16)
+# define GEN6_GS_ENABLE                                        (1 << 15)
+
+# define BRW_GS_EDGE_INDICATOR_0                       (1 << 8)
+# define BRW_GS_EDGE_INDICATOR_1                       (1 << 9)
+
+#define _3DSTATE_HS                             0x781B /* GEN7+ */
+#define _3DSTATE_TE                             0x781C /* GEN7+ */
+#define _3DSTATE_DS                             0x781D /* GEN7+ */
+
+#define _3DSTATE_CLIP                          0x7812 /* GEN6+ */
+/* DW1 */
+# define GEN7_CLIP_WINDING_CW                           (0 << 20)
+# define GEN7_CLIP_WINDING_CCW                          (1 << 20)
+# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_8          (0 << 19)
+# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_4          (1 << 19)
+# define GEN7_CLIP_EARLY_CULL                           (1 << 18)
+# define GEN7_CLIP_CULLMODE_BOTH                        (0 << 16)
+# define GEN7_CLIP_CULLMODE_NONE                        (1 << 16)
+# define GEN7_CLIP_CULLMODE_FRONT                       (2 << 16)
+# define GEN7_CLIP_CULLMODE_BACK                        (3 << 16)
+# define GEN6_CLIP_STATISTICS_ENABLE                   (1 << 10)
+/**
+ * Just does cheap culling based on the clip distance.  Bits must be
+ * disjoint with USER_CLIP_CLIP_DISTANCE bits.
+ */
+# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT           0
+/* DW2 */
+# define GEN6_CLIP_ENABLE                              (1 << 31)
+# define GEN6_CLIP_API_OGL                             (0 << 30)
+# define GEN6_CLIP_API_D3D                             (1 << 30)
+# define GEN6_CLIP_XY_TEST                             (1 << 28)
+# define GEN6_CLIP_Z_TEST                              (1 << 27)
+# define GEN6_CLIP_GB_TEST                             (1 << 26)
+/** 8-bit field of which user clip distances to clip aganist. */
+# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT           16
+# define GEN6_CLIP_MODE_NORMAL                         (0 << 13)
+# define GEN6_CLIP_MODE_REJECT_ALL                     (3 << 13)
+# define GEN6_CLIP_MODE_ACCEPT_ALL                     (4 << 13)
+# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE          (1 << 9)
+# define GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE  (1 << 8)
+# define GEN6_CLIP_TRI_PROVOKE_SHIFT                   4
+# define GEN6_CLIP_LINE_PROVOKE_SHIFT                  2
+# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT                        0
+/* DW3 */
+# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT               17
+# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT               6
+# define GEN6_CLIP_FORCE_ZERO_RTAINDEX                 (1 << 5)
+
+#define _3DSTATE_SF                            0x7813 /* GEN6+ */
+/* DW1 (for gen6) */
+# define GEN6_SF_NUM_OUTPUTS_SHIFT                     22
+# define GEN6_SF_SWIZZLE_ENABLE                                (1 << 21)
+# define GEN6_SF_POINT_SPRITE_UPPERLEFT                        (0 << 20)
+# define GEN6_SF_POINT_SPRITE_LOWERLEFT                        (1 << 20)
+# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT           11
+# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT           4
+/* DW2 */
+# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS              (1 << 11)
+# define GEN6_SF_STATISTICS_ENABLE                     (1 << 10)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID             (1 << 9)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME         (1 << 8)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT             (1 << 7)
+# define GEN6_SF_FRONT_SOLID                           (0 << 5)
+# define GEN6_SF_FRONT_WIREFRAME                       (1 << 5)
+# define GEN6_SF_FRONT_POINT                           (2 << 5)
+# define GEN6_SF_BACK_SOLID                            (0 << 3)
+# define GEN6_SF_BACK_WIREFRAME                                (1 << 3)
+# define GEN6_SF_BACK_POINT                            (2 << 3)
+# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE             (1 << 1)
+# define GEN6_SF_WINDING_CCW                           (1 << 0)
+/* DW3 */
+# define GEN6_SF_LINE_AA_ENABLE                                (1 << 31)
+# define GEN6_SF_CULL_BOTH                             (0 << 29)
+# define GEN6_SF_CULL_NONE                             (1 << 29)
+# define GEN6_SF_CULL_FRONT                            (2 << 29)
+# define GEN6_SF_CULL_BACK                             (3 << 29)
+# define GEN6_SF_LINE_WIDTH_SHIFT                      18 /* U3.7 */
+# define GEN6_SF_LINE_END_CAP_WIDTH_0_5                        (0 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_1_0                        (1 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_2_0                        (2 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_4_0                        (3 << 16)
+# define GEN6_SF_SCISSOR_ENABLE                                (1 << 11)
+# define GEN6_SF_MSRAST_OFF_PIXEL                      (0 << 8)
+# define GEN6_SF_MSRAST_OFF_PATTERN                    (1 << 8)
+# define GEN6_SF_MSRAST_ON_PIXEL                       (2 << 8)
+# define GEN6_SF_MSRAST_ON_PATTERN                     (3 << 8)
+/* DW4 */
+# define GEN6_SF_TRI_PROVOKE_SHIFT                     29
+# define GEN6_SF_LINE_PROVOKE_SHIFT                    27
+# define GEN6_SF_TRIFAN_PROVOKE_SHIFT                  25
+# define GEN6_SF_LINE_AA_MODE_MANHATTAN                        (0 << 14)
+# define GEN6_SF_LINE_AA_MODE_TRUE                     (1 << 14)
+# define GEN6_SF_VERTEX_SUBPIXEL_8BITS                 (0 << 12)
+# define GEN6_SF_VERTEX_SUBPIXEL_4BITS                 (1 << 12)
+# define GEN6_SF_USE_STATE_POINT_WIDTH                 (1 << 11)
+# define GEN6_SF_POINT_WIDTH_SHIFT                     0 /* U8.3 */
+/* DW5: depth offset constant */
+/* DW6: depth offset scale */
+/* DW7: depth offset clamp */
+/* DW8 */
+# define ATTRIBUTE_1_OVERRIDE_W                                (1 << 31)
+# define ATTRIBUTE_1_OVERRIDE_Z                                (1 << 30)
+# define ATTRIBUTE_1_OVERRIDE_Y                                (1 << 29)
+# define ATTRIBUTE_1_OVERRIDE_X                                (1 << 28)
+# define ATTRIBUTE_1_CONST_SOURCE_SHIFT                        25
+# define ATTRIBUTE_1_SWIZZLE_SHIFT                     22
+# define ATTRIBUTE_1_SOURCE_SHIFT                      16
+# define ATTRIBUTE_0_OVERRIDE_W                                (1 << 15)
+# define ATTRIBUTE_0_OVERRIDE_Z                                (1 << 14)
+# define ATTRIBUTE_0_OVERRIDE_Y                                (1 << 13)
+# define ATTRIBUTE_0_OVERRIDE_X                                (1 << 12)
+# define ATTRIBUTE_0_CONST_SOURCE_SHIFT                        9
+# define ATTRIBUTE_0_SWIZZLE_SHIFT                     6
+# define ATTRIBUTE_0_SOURCE_SHIFT                      0
+
+# define ATTRIBUTE_SWIZZLE_INPUTATTR                    0
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING             1
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_W                  2
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W           3
+# define ATTRIBUTE_SWIZZLE_SHIFT                        6
+
+/* DW16: Point sprite texture coordinate enables */
+/* DW17: Constant interpolation enables */
+/* DW18: attr 0-7 wrap shortest enables */
+/* DW19: attr 8-16 wrap shortest enables */
+
+/* On GEN7, many fields of 3DSTATE_SF were split out into a new command:
+ * 3DSTATE_SBE.  The remaining fields live in different DWords, but retain
+ * the same bit-offset.  The only new field:
+ */
+/* GEN7/DW1: */
+# define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT     12
+
+#define _3DSTATE_SBE                           0x781F /* GEN7+ */
+/* DW1 */
+# define GEN7_SBE_SWIZZLE_CONTROL_MODE                 (1 << 28)
+# define GEN7_SBE_NUM_OUTPUTS_SHIFT                    22
+# define GEN7_SBE_SWIZZLE_ENABLE                       (1 << 21)
+# define GEN7_SBE_POINT_SPRITE_LOWERLEFT               (1 << 20)
+# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT          11
+# define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT          4
+/* DW2-9: Attribute setup (same as DW8-15 of gen6 _3DSTATE_SF) */
+/* DW10: Point sprite texture coordinate enables */
+/* DW11: Constant interpolation enables */
+/* DW12: attr 0-7 wrap shortest enables */
+/* DW13: attr 8-16 wrap shortest enables */
+
+enum brw_wm_barycentric_interp_mode {
+   BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC                = 0,
+   BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC     = 1,
+   BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC       = 2,
+   BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC     = 3,
+   BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC  = 4,
+   BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC    = 5,
+   BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT  = 6
+};
+
+#define _3DSTATE_WM                            0x7814 /* GEN6+ */
+/* DW1: kernel pointer */
+/* DW2 */
+# define GEN6_WM_SPF_MODE                              (1 << 31)
+# define GEN6_WM_VECTOR_MASK_ENABLE                    (1 << 30)
+# define GEN6_WM_SAMPLER_COUNT_SHIFT                   27
+# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT       18
+# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754          (0 << 16)
+# define GEN6_WM_FLOATING_POINT_MODE_ALT               (1 << 16)
+/* DW3: scratch space */
+/* DW4 */
+# define GEN6_WM_STATISTICS_ENABLE                     (1 << 31)
+# define GEN6_WM_DEPTH_CLEAR                           (1 << 30)
+# define GEN6_WM_DEPTH_RESOLVE                         (1 << 28)
+# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE            (1 << 27)
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0            16
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1            8
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2            0
+/* DW5 */
+# define GEN6_WM_MAX_THREADS_SHIFT                     25
+# define GEN6_WM_KILL_ENABLE                           (1 << 22)
+# define GEN6_WM_COMPUTED_DEPTH                                (1 << 21)
+# define GEN6_WM_USES_SOURCE_DEPTH                     (1 << 20)
+# define GEN6_WM_DISPATCH_ENABLE                       (1 << 19)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5             (0 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0             (1 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0             (2 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0             (3 << 16)
+# define GEN6_WM_LINE_AA_WIDTH_0_5                     (0 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_1_0                     (1 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_2_0                     (2 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_4_0                     (3 << 14)
+# define GEN6_WM_POLYGON_STIPPLE_ENABLE                        (1 << 13)
+# define GEN6_WM_LINE_STIPPLE_ENABLE                   (1 << 11)
+# define GEN6_WM_OMASK_TO_RENDER_TARGET                        (1 << 9)
+# define GEN6_WM_USES_SOURCE_W                         (1 << 8)
+# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE              (1 << 7)
+# define GEN6_WM_32_DISPATCH_ENABLE                    (1 << 2)
+# define GEN6_WM_16_DISPATCH_ENABLE                    (1 << 1)
+# define GEN6_WM_8_DISPATCH_ENABLE                     (1 << 0)
+/* DW6 */
+# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT                  20
+# define GEN6_WM_POSOFFSET_NONE                                (0 << 18)
+# define GEN6_WM_POSOFFSET_CENTROID                    (2 << 18)
+# define GEN6_WM_POSOFFSET_SAMPLE                      (3 << 18)
+# define GEN6_WM_POSITION_ZW_PIXEL                     (0 << 16)
+# define GEN6_WM_POSITION_ZW_CENTROID                  (2 << 16)
+# define GEN6_WM_POSITION_ZW_SAMPLE                    (3 << 16)
+# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC     (1 << 15)
+# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC   (1 << 14)
+# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC      (1 << 13)
+# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC                (1 << 12)
+# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC      (1 << 11)
+# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC         (1 << 10)
+# define GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT   10
+# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT            (1 << 9)
+# define GEN6_WM_MSRAST_OFF_PIXEL                      (0 << 1)
+# define GEN6_WM_MSRAST_OFF_PATTERN                    (1 << 1)
+# define GEN6_WM_MSRAST_ON_PIXEL                       (2 << 1)
+# define GEN6_WM_MSRAST_ON_PATTERN                     (3 << 1)
+# define GEN6_WM_MSDISPMODE_PERPIXEL                   (1 << 0)
+/* DW7: kernel 1 pointer */
+/* DW8: kernel 2 pointer */
+
+#define _3DSTATE_CONSTANT_VS                 0x7815 /* GEN6+ */
+#define _3DSTATE_CONSTANT_GS                 0x7816 /* GEN6+ */
+#define _3DSTATE_CONSTANT_PS                 0x7817 /* GEN6+ */
+# define GEN6_CONSTANT_BUFFER_3_ENABLE                 (1 << 15)
+# define GEN6_CONSTANT_BUFFER_2_ENABLE                 (1 << 14)
+# define GEN6_CONSTANT_BUFFER_1_ENABLE                 (1 << 13)
+# define GEN6_CONSTANT_BUFFER_0_ENABLE                 (1 << 12)
+
+#define _3DSTATE_CONSTANT_HS                  0x7819 /* GEN7+ */
+#define _3DSTATE_CONSTANT_DS                  0x781A /* GEN7+ */
+
+#define _3DSTATE_STREAMOUT                    0x781e /* GEN7+ */
+/* DW1 */
+# define SO_FUNCTION_ENABLE                            (1 << 31)
+# define SO_RENDERING_DISABLE                          (1 << 30)
+/* This selects which incoming rendering stream goes down the pipeline.  The
+ * rendering stream is 0 if not defined by special cases in the GS state.
+ */
+# define SO_RENDER_STREAM_SELECT_SHIFT                 27
+# define SO_RENDER_STREAM_SELECT_MASK                  INTEL_MASK(28, 27)
+/* Controls reordering of TRISTRIP_* elements in stream output (not rendering).
+ */
+# define SO_REORDER_TRAILING                           (1 << 26)
+/* Controls SO_NUM_PRIMS_WRITTEN_* and SO_PRIM_STORAGE_* */
+# define SO_STATISTICS_ENABLE                          (1 << 25)
+# define SO_BUFFER_ENABLE(n)                           (1 << (8 + (n)))
+/* DW2 */
+# define SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT          29
+# define SO_STREAM_3_VERTEX_READ_OFFSET_MASK           INTEL_MASK(29, 29)
+# define SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT          24
+# define SO_STREAM_3_VERTEX_READ_LENGTH_MASK           INTEL_MASK(28, 24)
+# define SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT          21
+# define SO_STREAM_2_VERTEX_READ_OFFSET_MASK           INTEL_MASK(21, 21)
+# define SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT          16
+# define SO_STREAM_2_VERTEX_READ_LENGTH_MASK           INTEL_MASK(20, 16)
+# define SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT          13
+# define SO_STREAM_1_VERTEX_READ_OFFSET_MASK           INTEL_MASK(13, 13)
+# define SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT          8
+# define SO_STREAM_1_VERTEX_READ_LENGTH_MASK           INTEL_MASK(12, 8)
+# define SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT          5
+# define SO_STREAM_0_VERTEX_READ_OFFSET_MASK           INTEL_MASK(5, 5)
+# define SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT          0
+# define SO_STREAM_0_VERTEX_READ_LENGTH_MASK           INTEL_MASK(4, 0)
+
+/* 3DSTATE_WM for Gen7 */
+/* DW1 */
+# define GEN7_WM_STATISTICS_ENABLE                     (1 << 31)
+# define GEN7_WM_DEPTH_CLEAR                           (1 << 30)
+# define GEN7_WM_DISPATCH_ENABLE                       (1 << 29)
+# define GEN7_WM_DEPTH_RESOLVE                         (1 << 28)
+# define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE            (1 << 27)
+# define GEN7_WM_KILL_ENABLE                           (1 << 25)
+# define GEN7_WM_PSCDEPTH_OFF                          (0 << 23)
+# define GEN7_WM_PSCDEPTH_ON                           (1 << 23)
+# define GEN7_WM_PSCDEPTH_ON_GE                                (2 << 23)
+# define GEN7_WM_PSCDEPTH_ON_LE                                (3 << 23)
+# define GEN7_WM_USES_SOURCE_DEPTH                     (1 << 20)
+# define GEN7_WM_USES_SOURCE_W                         (1 << 19)
+# define GEN7_WM_POSITION_ZW_PIXEL                     (0 << 17)
+# define GEN7_WM_POSITION_ZW_CENTROID                  (2 << 17)
+# define GEN7_WM_POSITION_ZW_SAMPLE                    (3 << 17)
+# define GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT   11
+# define GEN7_WM_USES_INPUT_COVERAGE_MASK              (1 << 10)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5             (0 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0             (1 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_2_0             (2 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_4_0             (3 << 8)
+# define GEN7_WM_LINE_AA_WIDTH_0_5                     (0 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_1_0                     (1 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_2_0                     (2 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_4_0                     (3 << 6)
+# define GEN7_WM_POLYGON_STIPPLE_ENABLE                        (1 << 4)
+# define GEN7_WM_LINE_STIPPLE_ENABLE                   (1 << 3)
+# define GEN7_WM_POINT_RASTRULE_UPPER_RIGHT            (1 << 2)
+# define GEN7_WM_MSRAST_OFF_PIXEL                      (0 << 0)
+# define GEN7_WM_MSRAST_OFF_PATTERN                    (1 << 0)
+# define GEN7_WM_MSRAST_ON_PIXEL                       (2 << 0)
+# define GEN7_WM_MSRAST_ON_PATTERN                     (3 << 0)
+/* DW2 */
+# define GEN7_WM_MSDISPMODE_PERPIXEL                   (1 << 31)
+
+#define _3DSTATE_PS                            0x7820 /* GEN7+ */
+/* DW1: kernel pointer */
+/* DW2 */
+# define GEN7_PS_SPF_MODE                              (1 << 31)
+# define GEN7_PS_VECTOR_MASK_ENABLE                    (1 << 30)
+# define GEN7_PS_SAMPLER_COUNT_SHIFT                   27
+# define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT       18
+# define GEN7_PS_FLOATING_POINT_MODE_IEEE_754          (0 << 16)
+# define GEN7_PS_FLOATING_POINT_MODE_ALT               (1 << 16)
+/* DW3: scratch space */
+/* DW4 */
+# define GEN7_PS_MAX_THREADS_SHIFT                     24
+# define GEN7_PS_PUSH_CONSTANT_ENABLE                  (1 << 11)
+# define GEN7_PS_ATTRIBUTE_ENABLE                      (1 << 10)
+# define GEN7_PS_OMASK_TO_RENDER_TARGET                        (1 << 9)
+# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE              (1 << 7)
+# define GEN7_PS_POSOFFSET_NONE                                (0 << 3)
+# define GEN7_PS_POSOFFSET_CENTROID                    (2 << 3)
+# define GEN7_PS_POSOFFSET_SAMPLE                      (3 << 3)
+# define GEN7_PS_32_DISPATCH_ENABLE                    (1 << 2)
+# define GEN7_PS_16_DISPATCH_ENABLE                    (1 << 1)
+# define GEN7_PS_8_DISPATCH_ENABLE                     (1 << 0)
+/* DW5 */
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_0            16
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_1            8
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_2            0
+/* DW6: kernel 1 pointer */
+/* DW7: kernel 2 pointer */
+
+#define _3DSTATE_SAMPLE_MASK                   0x7818 /* GEN6+ */
+
+#define _3DSTATE_DRAWING_RECTANGLE             0x7900
+#define _3DSTATE_BLEND_CONSTANT_COLOR          0x7901
+#define _3DSTATE_CHROMA_KEY                    0x7904
+#define _3DSTATE_DEPTH_BUFFER                  0x7905 /* GEN4-6 */
+#define _3DSTATE_POLY_STIPPLE_OFFSET           0x7906
+#define _3DSTATE_POLY_STIPPLE_PATTERN          0x7907
+#define _3DSTATE_LINE_STIPPLE_PATTERN          0x7908
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP     0x7909
+#define _3DSTATE_AA_LINE_PARAMETERS            0x790a /* G45+ */
+
+#define _3DSTATE_GS_SVB_INDEX                  0x790b /* CTG+ */
+/* DW1 */
+# define SVB_INDEX_SHIFT                               29
+# define SVB_LOAD_INTERNAL_VERTEX_COUNT                        (1 << 0) /* SNB+ */
+/* DW2: SVB index */
+/* DW3: SVB maximum index */
+
+#define _3DSTATE_MULTISAMPLE                   0x790d /* GEN6+ */
+/* DW1 */
+# define MS_PIXEL_LOCATION_CENTER                      (0 << 4)
+# define MS_PIXEL_LOCATION_UPPER_LEFT                  (1 << 4)
+# define MS_NUMSAMPLES_1                               (0 << 1)
+# define MS_NUMSAMPLES_4                               (2 << 1)
+# define MS_NUMSAMPLES_8                               (3 << 1)
+
+#define _3DSTATE_STENCIL_BUFFER                        0x790e /* ILK, SNB */
+#define _3DSTATE_HIER_DEPTH_BUFFER             0x790f /* ILK, SNB */
+
+#define GEN7_3DSTATE_CLEAR_PARAMS              0x7804
+#define GEN7_3DSTATE_DEPTH_BUFFER              0x7805
+#define GEN7_3DSTATE_STENCIL_BUFFER            0x7806
+#define GEN7_3DSTATE_HIER_DEPTH_BUFFER         0x7807
+
+#define _3DSTATE_CLEAR_PARAMS                  0x7910 /* ILK, SNB */
+# define DEPTH_CLEAR_VALID                             (1 << 15)
+/* DW1: depth clear value */
+
+#define _3DSTATE_SO_DECL_LIST                  0x7917 /* GEN7+ */
+/* DW1 */
+# define SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT           12
+# define SO_STREAM_TO_BUFFER_SELECTS_3_MASK            INTEL_MASK(15, 12)
+# define SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT           8
+# define SO_STREAM_TO_BUFFER_SELECTS_2_MASK            INTEL_MASK(11, 8)
+# define SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT           4
+# define SO_STREAM_TO_BUFFER_SELECTS_1_MASK            INTEL_MASK(7, 4)
+# define SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT           0
+# define SO_STREAM_TO_BUFFER_SELECTS_0_MASK            INTEL_MASK(3, 0)
+/* DW2 */
+# define SO_NUM_ENTRIES_3_SHIFT                                24
+# define SO_NUM_ENTRIES_3_MASK                         INTEL_MASK(31, 24)
+# define SO_NUM_ENTRIES_2_SHIFT                                16
+# define SO_NUM_ENTRIES_2_MASK                         INTEL_MASK(23, 16)
+# define SO_NUM_ENTRIES_1_SHIFT                                8
+# define SO_NUM_ENTRIES_1_MASK                         INTEL_MASK(15, 8)
+# define SO_NUM_ENTRIES_0_SHIFT                                0
+# define SO_NUM_ENTRIES_0_MASK                         INTEL_MASK(7, 0)
+
+/* SO_DECL DW0 */
+# define SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT              12
+# define SO_DECL_OUTPUT_BUFFER_SLOT_MASK               INTEL_MASK(13, 12)
+# define SO_DECL_HOLE_FLAG                             (1 << 11)
+# define SO_DECL_REGISTER_INDEX_SHIFT                  4
+# define SO_DECL_REGISTER_INDEX_MASK                   INTEL_MASK(9, 4)
+# define SO_DECL_COMPONENT_MASK_SHIFT                  0
+# define SO_DECL_COMPONENT_MASK_MASK                   INTEL_MASK(3, 0)
+
+#define _3DSTATE_SO_BUFFER                    0x7918 /* GEN7+ */
+/* DW1 */
+# define SO_BUFFER_INDEX_SHIFT                         29
+# define SO_BUFFER_INDEX_MASK                          INTEL_MASK(30, 29)
+# define SO_BUFFER_PITCH_SHIFT                         0
+# define SO_BUFFER_PITCH_MASK                          INTEL_MASK(11, 0)
+/* DW2: start address */
+/* DW3: end address. */
+
+#define CMD_PIPE_CONTROL              0x7a00
+
+#define CMD_MI_FLUSH                  0x0200
+
+
+/* Bitfields for the URB_WRITE message, DW2 of message header: */
+#define URB_WRITE_PRIM_END             0x1
+#define URB_WRITE_PRIM_START           0x2
+#define URB_WRITE_PRIM_TYPE_SHIFT      2
+
+
+/* Maximum number of entries that can be addressed using a binding table
+ * pointer of type SURFTYPE_BUFFER
+ */
+#define BRW_MAX_NUM_BUFFER_ENTRIES     (1 << 27)
+
+#include "brw_chipset.h"
+
+#endif
index eecafe2..95fc997 100644 (file)
 #include <stdarg.h>
 
 //#include "main/mtypes.h"
-
 //#include "brw_context.h"
 #include "brw_defines.h"
+#include "brw_structs.h"
+
+#include <stdint.h>
 
 struct {
     char    *name;
@@ -633,7 +635,7 @@ static int src_da1 (FILE *file, uint32_t type, uint32_t _reg_file,
 static int src_ia1 (FILE *file,
                     uint32_t type,
                     uint32_t _reg_file,
-                    GLint _addr_imm,
+                    int _addr_imm,
                     uint32_t _addr_subreg_nr,
                     uint32_t _negate,
                     uint32_t __abs,
index 006d5e5..50031fc 100644 (file)
   */
   
 
-#include "brw_context.h"
+// #include "brw_context.h"
 #include "brw_defines.h"
 #include "brw_eu.h"
 
-#include "glsl/ralloc.h"
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+
+// #include "glsl/ralloc.h"
 
 /* Returns the corresponding conditional mod for swapping src0 and
  * src1 in e.g. CMP.
@@ -56,7 +60,7 @@ brw_swap_cmod(uint32_t cmod)
 /* How does predicate control work when execution_size != 8?  Do I
  * need to test/set for 0xffff when execution_size is 16?
  */
-void brw_set_predicate_control_flag_value( struct brw_compile *p, uint32_t value )
+void brw_set_predicate_control_flag_value(struct brw_compile *p, uint32_t value)
 {
    p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
@@ -72,7 +76,7 @@ void brw_set_predicate_control_flag_value( struct brw_compile *p, uint32_t value
    }   
 }
 
-void brw_set_predicate_control( struct brw_compile *p, uint32_t pc )
+void brw_set_predicate_control(struct brw_compile *p, uint32_t pc)
 {
    p->current->header.predicate_control = pc;
 }
@@ -82,12 +86,12 @@ void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse)
    p->current->header.predicate_inverse = predicate_inverse;
 }
 
-void brw_set_conditionalmod( struct brw_compile *p, uint32_t conditional )
+void brw_set_conditionalmod(struct brw_compile *p, uint32_t conditional)
 {
    p->current->header.destreg__conditionalmod = conditional;
 }
 
-void brw_set_access_mode( struct brw_compile *p, uint32_t access_mode )
+void brw_set_access_mode(struct brw_compile *p, uint32_t access_mode)
 {
    p->current->header.access_mode = access_mode;
 }
@@ -98,7 +102,7 @@ brw_set_compression_control(struct brw_compile *p,
 {
    p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED);
 
-   if (p->brw->intel.gen >= 6) {
+   if (p->gen >= 6) {
       /* Since we don't use the 32-wide support in gen6, we translate
        * the pre-gen6 compression control here.
        */
@@ -129,23 +133,25 @@ brw_set_compression_control(struct brw_compile *p,
    }
 }
 
-void brw_set_mask_control( struct brw_compile *p, uint32_t value )
+void brw_set_mask_control(struct brw_compile *p, uint32_t value)
 {
    p->current->header.mask_control = value;
 }
 
-void brw_set_saturate( struct brw_compile *p, uint32_t value )
+void brw_set_saturate(struct brw_compile *p, uint32_t value)
 {
    p->current->header.saturate = value;
 }
 
+#if 0
 void brw_set_acc_write_control(struct brw_compile *p, uint32_t value)
 {
    if (p->brw->intel.gen >= 6)
       p->current->header.acc_wr_control = value;
 }
+#endif
 
-void brw_push_insn_state( struct brw_compile *p )
+void brw_push_insn_state(struct brw_compile *p)
 {
    assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
    memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
@@ -153,7 +159,7 @@ void brw_push_insn_state( struct brw_compile *p )
    p->current++;   
 }
 
-void brw_pop_insn_state( struct brw_compile *p )
+void brw_pop_insn_state(struct brw_compile *p)
 {
    assert(p->current != p->stack);
    p->current--;
@@ -161,6 +167,7 @@ void brw_pop_insn_state( struct brw_compile *p )
 }
 
 
+#if 0
 /***********************************************************************
  */
 void
@@ -200,8 +207,8 @@ brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx)
 }
 
 
-const uint32_t *brw_get_program( struct brw_compile *p,
-                               uint32_t *sz )
+const uint32_t *brw_get_program(struct brw_compile *p,
+                               uint32_t *sz)
 {
    uint32_t i;
 
@@ -335,3 +342,4 @@ brw_resolve_cals(struct brw_compile *c)
         c->first_label = NULL;
     }
 }
+#endif
index e3d8a1b..1553ce9 100644 (file)
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-   
-
 #ifndef BRW_EU_H
 #define BRW_EU_H
 
 #include <stdbool.h>
+#include <assert.h>
 #include "brw_structs.h"
 #include "brw_defines.h"
-#include "program/prog_instruction.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
 
 #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
 #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
 #define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
 #define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
 
+#define WRITEMASK_X     0x1
+#define WRITEMASK_Y     0x2
+#define WRITEMASK_XY    0x3
+#define WRITEMASK_Z     0x4
+#define WRITEMASK_XZ    0x5
+#define WRITEMASK_YZ    0x6
+#define WRITEMASK_XYZ   0x7
+#define WRITEMASK_W     0x8
+#define WRITEMASK_XW    0x9
+#define WRITEMASK_YW    0xa
+#define WRITEMASK_XYW   0xb
+#define WRITEMASK_ZW    0xc
+#define WRITEMASK_XZW   0xd
+#define WRITEMASK_YZW   0xe
+#define WRITEMASK_XYZW  0xf
+
 static inline bool brw_is_single_value_swizzle(int swiz)
 {
    return (swiz == BRW_SWIZZLE_XXXX ||
-          swiz == BRW_SWIZZLE_YYYY ||
-          swiz == BRW_SWIZZLE_ZZZZ ||
-          swiz == BRW_SWIZZLE_WWWW);
+           swiz == BRW_SWIZZLE_YYYY ||
+           swiz == BRW_SWIZZLE_ZZZZ ||
+           swiz == BRW_SWIZZLE_WWWW);
 }
 
 #define REG_SIZE (8*4)
 
-
 /* These aren't hardware structs, just something useful for us to pass around:
  *
  * Align1 operation has a lot of control over input ranges.  Used in
@@ -61,109 +78,70 @@ static inline bool brw_is_single_value_swizzle(int swiz)
  */
 struct brw_reg
 {
-   GLuint type:4;
-   GLuint file:2;
-   GLuint nr:8;
-   GLuint subnr:5;             /* :1 in align16 */
-   GLuint negate:1;            /* source only */
-   GLuint abs:1;               /* source only */
-   GLuint vstride:4;           /* source only */
-   GLuint width:3;             /* src only, align1 only */
-   GLuint hstride:2;                   /* align1 only */
-   GLuint address_mode:1;      /* relative addressing, hopefully! */
-   GLuint pad0:1;
-
-   union {      
+   uint32_t type:4;
+   uint32_t file:2;
+   uint32_t nr:8;
+   uint32_t subnr:5;                /* :1 in align16 */
+   uint32_t negate:1;                /* source only */
+   uint32_t abs:1;                /* source only */
+   uint32_t vstride:4;                /* source only */
+   uint32_t width:3;                /* src only, align1 only */
+   uint32_t hstride:2;                   /* align1 only */
+   uint32_t address_mode:1;        /* relative addressing, hopefully! */
+   uint32_t pad0:1;
+
+   union {
       struct {
-        GLuint swizzle:8;              /* src only, align16 only */
-        GLuint writemask:4;            /* dest only, align16 only */
-        GLint  indirect_offset:10;     /* relative addressing offset */
-        GLuint pad1:10;                /* two dwords total */
+         uint32_t swizzle:8;                /* src only, align16 only */
+         uint32_t writemask:4;                /* dest only, align16 only */
+         int  indirect_offset:10;        /* relative addressing offset */
+         uint32_t pad1:10;                /* two dwords total */
       } bits;
 
-      GLfloat f;
-      GLint   d;
-      GLuint ud;
-   } dw1;      
+      float f;
+      int   d;
+      uint32_t ud;
+   } dw1;
 };
 
 
 struct brw_indirect {
-   GLuint addr_subnr:4;
-   GLint addr_offset:10;
-   GLuint pad:18;
+   uint32_t addr_subnr:4;
+   int addr_offset:10;
+   uint32_t pad:18;
 };
 
-
-struct brw_glsl_label;
-struct brw_glsl_call;
-
-
-
 #define BRW_EU_MAX_INSN_STACK 5
-
+#define BRW_MAX_INSTRUCTION_NUM 8192
 struct brw_compile {
-   struct brw_instruction *store;
-   int store_size;
-   GLuint nr_insn;
-
-   void *mem_ctx;
-
-   /* Allow clients to push/pop instruction state:
-    */
-   struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
-   bool compressed_stack[BRW_EU_MAX_INSN_STACK];
-   struct brw_instruction *current;
-
-   GLuint flag_value;
-   bool single_program_flow;
-   bool compressed;
-   struct brw_context *brw;
-
-   /* Control flow stacks:
-    * - if_stack contains IF and ELSE instructions which must be patched
-    *   (and popped) once the matching ENDIF instruction is encountered.
-    *
-    *   Just store the instruction pointer(an index).
-    */
-   int *if_stack;
-   int if_stack_depth;
-   int if_stack_array_size;
-
-   /**
-    * loop_stack contains the instruction pointers of the starts of loops which
-    * must be patched (and popped) once the matching WHILE instruction is
-    * encountered.
-    */
-   int *loop_stack;
-   /**
-    * pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF
-    * blocks they were popping out of, to fix up the mask stack.  This tracks
-    * the IF/ENDIF nesting in each current nested loop level.
-    */
-   int *if_depth_in_loop;
-   int loop_stack_depth;
-   int loop_stack_array_size;
-
-   struct brw_glsl_label *first_label;  /**< linked list of labels */
-   struct brw_glsl_call *first_call;    /**< linked list of CALs */
+  int gen;
+  struct brw_instruction store[8192];
+  int store_size;
+  uint32_t nr_insn;
+
+  /* Allow clients to push/pop instruction state */
+  struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+  bool compressed_stack[BRW_EU_MAX_INSN_STACK];
+  struct brw_instruction *current;
+
+  uint32_t flag_value;
+  bool single_program_flow;
+  bool compressed;
+  struct brw_context *brw;
 };
 
-
 void
-brw_save_label(struct brw_compile *c, const char *name, GLuint position);
+brw_save_label(struct brw_compile *c, const char *name, uint32_t position);
 
 void
-brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos);
+brw_save_call(struct brw_compile *c, const char *name, uint32_t call_pos);
 
 void
 brw_resolve_cals(struct brw_compile *c);
 
-
-
-static INLINE int type_sz( GLuint type )
+static inline int type_sz(uint32_t type)
 {
-   switch( type ) {
+   switch(type) {
    case BRW_REGISTER_TYPE_UD:
    case BRW_REGISTER_TYPE_D:
    case BRW_REGISTER_TYPE_F:
@@ -192,15 +170,15 @@ static INLINE int type_sz( GLuint type )
  * \param swizzle  one of BRW_SWIZZLE_x
  * \param writemask  WRITEMASK_X/Y/Z/W bitfield
  */
-static INLINE struct brw_reg brw_reg( GLuint file,
-                                      GLuint nr,
-                                      GLuint subnr,
-                                      GLuint type,
-                                      GLuint vstride,
-                                      GLuint width,
-                                      GLuint hstride,
-                                      GLuint swizzle,
-                                      GLuint writemask )
+static inline struct brw_reg brw_reg(uint32_t file,
+                                     uint32_t nr,
+                                     uint32_t subnr,
+                                     uint32_t type,
+                                     uint32_t vstride,
+                                     uint32_t width,
+                                     uint32_t hstride,
+                                     uint32_t swizzle,
+                                     uint32_t writemask)
 {
    struct brw_reg reg;
    if (file == BRW_GENERAL_REGISTER_FILE)
@@ -236,166 +214,159 @@ static INLINE struct brw_reg brw_reg( GLuint file,
 }
 
 /** Construct float[16] register */
-static INLINE struct brw_reg brw_vec16_reg( GLuint file,
-                                             GLuint nr,
-                                             GLuint subnr )
+static inline struct brw_reg brw_vec16_reg(uint32_t file,
+                                           uint32_t nr,
+                                           uint32_t subnr)
 {
    return brw_reg(file,
-                 nr,
-                 subnr,
-                 BRW_REGISTER_TYPE_F,
-                 BRW_VERTICAL_STRIDE_16,
-                 BRW_WIDTH_16,
-                 BRW_HORIZONTAL_STRIDE_1,
-                 BRW_SWIZZLE_XYZW,
-                 WRITEMASK_XYZW);
+                  nr,
+                  subnr,
+                  BRW_REGISTER_TYPE_F,
+                  BRW_VERTICAL_STRIDE_16,
+                  BRW_WIDTH_16,
+                  BRW_HORIZONTAL_STRIDE_1,
+                  BRW_SWIZZLE_XYZW,
+                  WRITEMASK_XYZW);
 }
 
 /** Construct float[8] register */
-static INLINE struct brw_reg brw_vec8_reg( GLuint file,
-                                            GLuint nr,
-                                            GLuint subnr )
+static inline struct brw_reg brw_vec8_reg(uint32_t file,
+                                          uint32_t nr,
+                                          uint32_t subnr)
 {
    return brw_reg(file,
-                 nr,
-                 subnr,
-                 BRW_REGISTER_TYPE_F,
-                 BRW_VERTICAL_STRIDE_8,
-                 BRW_WIDTH_8,
-                 BRW_HORIZONTAL_STRIDE_1,
-                 BRW_SWIZZLE_XYZW,
-                 WRITEMASK_XYZW);
+                  nr,
+                  subnr,
+                  BRW_REGISTER_TYPE_F,
+                  BRW_VERTICAL_STRIDE_8,
+                  BRW_WIDTH_8,
+                  BRW_HORIZONTAL_STRIDE_1,
+                  BRW_SWIZZLE_XYZW,
+                  WRITEMASK_XYZW);
 }
 
 /** Construct float[4] register */
-static INLINE struct brw_reg brw_vec4_reg( GLuint file,
-                                             GLuint nr,
-                                             GLuint subnr )
+static inline struct brw_reg brw_vec4_reg(uint32_t file,
+                                          uint32_t nr,
+                                          uint32_t subnr)
 {
    return brw_reg(file,
-                 nr,
-                 subnr,
-                 BRW_REGISTER_TYPE_F,
-                 BRW_VERTICAL_STRIDE_4,
-                 BRW_WIDTH_4,
-                 BRW_HORIZONTAL_STRIDE_1,
-                 BRW_SWIZZLE_XYZW,
-                 WRITEMASK_XYZW);
+                  nr,
+                  subnr,
+                  BRW_REGISTER_TYPE_F,
+                  BRW_VERTICAL_STRIDE_4,
+                  BRW_WIDTH_4,
+                  BRW_HORIZONTAL_STRIDE_1,
+                  BRW_SWIZZLE_XYZW,
+                  WRITEMASK_XYZW);
 }
 
 /** Construct float[2] register */
-static INLINE struct brw_reg brw_vec2_reg( GLuint file,
-                                             GLuint nr,
-                                             GLuint subnr )
+static inline struct brw_reg brw_vec2_reg(uint32_t file,
+                                          uint32_t nr,
+                                          uint32_t subnr)
 {
    return brw_reg(file,
-                 nr,
-                 subnr,
-                 BRW_REGISTER_TYPE_F,
-                 BRW_VERTICAL_STRIDE_2,
-                 BRW_WIDTH_2,
-                 BRW_HORIZONTAL_STRIDE_1,
-                 BRW_SWIZZLE_XYXY,
-                 WRITEMASK_XY);
+                  nr,
+                  subnr,
+                  BRW_REGISTER_TYPE_F,
+                  BRW_VERTICAL_STRIDE_2,
+                  BRW_WIDTH_2,
+                  BRW_HORIZONTAL_STRIDE_1,
+                  BRW_SWIZZLE_XYXY,
+                  WRITEMASK_XY);
 }
 
 /** Construct float[1] register */
-static INLINE struct brw_reg brw_vec1_reg( GLuint file,
-                                            GLuint nr,
-                                            GLuint subnr )
+static inline struct brw_reg brw_vec1_reg(uint32_t file,
+                                          uint32_t nr,
+                                          uint32_t subnr)
 {
    return brw_reg(file,
-                 nr,
-                 subnr,
-                 BRW_REGISTER_TYPE_F,
-                 BRW_VERTICAL_STRIDE_0,
-                 BRW_WIDTH_1,
-                 BRW_HORIZONTAL_STRIDE_0,
-                 BRW_SWIZZLE_XXXX,
-                 WRITEMASK_X);
+                  nr,
+                  subnr,
+                  BRW_REGISTER_TYPE_F,
+                  BRW_VERTICAL_STRIDE_0,
+                  BRW_WIDTH_1,
+                  BRW_HORIZONTAL_STRIDE_0,
+                  BRW_SWIZZLE_XXXX,
+                  WRITEMASK_X);
 }
 
 
-static INLINE struct brw_reg retype( struct brw_reg reg,
-                                      GLuint type )
+static inline struct brw_reg retype(struct brw_reg reg, uint32_t type)
 {
    reg.type = type;
    return reg;
 }
 
-static inline struct brw_reg
-sechalf(struct brw_reg reg)
+static inline struct brw_reg sechalf(struct brw_reg reg)
 {
    if (reg.vstride)
       reg.nr++;
    return reg;
 }
 
-static INLINE struct brw_reg suboffset( struct brw_reg reg,
-                                         GLuint delta )
-{   
+static inline struct brw_reg suboffset(struct brw_reg reg, uint32_t delta)
+{
    reg.subnr += delta * type_sz(reg.type);
    return reg;
 }
 
-
-static INLINE struct brw_reg offset( struct brw_reg reg,
-                                      GLuint delta )
+static inline struct brw_reg offset(struct brw_reg reg, uint32_t delta)
 {
    reg.nr += delta;
    return reg;
 }
 
-
-static INLINE struct brw_reg byte_offset( struct brw_reg reg,
-                                           GLuint bytes )
+static inline struct brw_reg byte_offset(struct brw_reg reg, uint32_t bytes)
 {
-   GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
+   uint32_t newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
    reg.nr = newoffset / REG_SIZE;
    reg.subnr = newoffset % REG_SIZE;
    return reg;
 }
-   
+
 
 /** Construct unsigned word[16] register */
-static INLINE struct brw_reg brw_uw16_reg( GLuint file,
-                                            GLuint nr,
-                                            GLuint subnr )
+static inline struct brw_reg brw_uw16_reg(uint32_t file,
+                                          uint32_t nr,
+                                          uint32_t subnr)
 {
    return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
 }
 
 /** Construct unsigned word[8] register */
-static INLINE struct brw_reg brw_uw8_reg( GLuint file,
-                                           GLuint nr,
-                                           GLuint subnr )
+static inline struct brw_reg brw_uw8_reg(uint32_t file,
+                                         uint32_t nr,
+                                         uint32_t subnr)
 {
    return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
 }
 
 /** Construct unsigned word[1] register */
-static INLINE struct brw_reg brw_uw1_reg( GLuint file,
-                                           GLuint nr,
-                                           GLuint subnr )
+static inline struct brw_reg brw_uw1_reg(uint32_t file,
+                                         uint32_t nr,
+                                         uint32_t subnr)
 {
    return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
 }
 
-static INLINE struct brw_reg brw_imm_reg( GLuint type )
+static inline struct brw_reg brw_imm_reg(uint32_t type)
 {
-   return brw_reg( BRW_IMMEDIATE_VALUE,
-                  0,
-                  0,
-                  type,
-                  BRW_VERTICAL_STRIDE_0,
-                  BRW_WIDTH_1,
-                  BRW_HORIZONTAL_STRIDE_0,
-                  0,
-                  0);      
+   return brw_reg(BRW_IMMEDIATE_VALUE,
+                   0,
+                   0,
+                   type,
+                   BRW_VERTICAL_STRIDE_0,
+                   BRW_WIDTH_1,
+                   BRW_HORIZONTAL_STRIDE_0,
+                   0,
+                   0);
 }
 
 /** Construct float immediate register */
-static INLINE struct brw_reg brw_imm_f( GLfloat f )
+static inline struct brw_reg brw_imm_f(float f)
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
    imm.dw1.f = f;
@@ -403,7 +374,7 @@ static INLINE struct brw_reg brw_imm_f( GLfloat f )
 }
 
 /** Construct integer immediate register */
-static INLINE struct brw_reg brw_imm_d( GLint d )
+static inline struct brw_reg brw_imm_d(int d)
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
    imm.dw1.d = d;
@@ -411,7 +382,7 @@ static INLINE struct brw_reg brw_imm_d( GLint d )
 }
 
 /** Construct uint immediate register */
-static INLINE struct brw_reg brw_imm_ud( GLuint ud )
+static inline struct brw_reg brw_imm_ud(uint32_t ud)
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
    imm.dw1.ud = ud;
@@ -419,7 +390,7 @@ static INLINE struct brw_reg brw_imm_ud( GLuint ud )
 }
 
 /** Construct ushort immediate register */
-static INLINE struct brw_reg brw_imm_uw( GLushort uw )
+static inline struct brw_reg brw_imm_uw(uint16_t uw)
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
    imm.dw1.ud = uw | (uw << 16);
@@ -427,7 +398,7 @@ static INLINE struct brw_reg brw_imm_uw( GLushort uw )
 }
 
 /** Construct short immediate register */
-static INLINE struct brw_reg brw_imm_w( GLshort w )
+static inline struct brw_reg brw_imm_w(short w)
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
    imm.dw1.d = w | (w << 16);
@@ -439,7 +410,7 @@ static INLINE struct brw_reg brw_imm_w( GLshort w )
  */
 
 /** Construct vector of eight signed half-byte values */
-static INLINE struct brw_reg brw_imm_v( GLuint v )
+static inline struct brw_reg brw_imm_v(uint32_t v)
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
    imm.vstride = BRW_VERTICAL_STRIDE_0;
@@ -450,7 +421,7 @@ static INLINE struct brw_reg brw_imm_v( GLuint v )
 }
 
 /** Construct vector of four 8-bit float values */
-static INLINE struct brw_reg brw_imm_vf( GLuint v )
+static inline struct brw_reg brw_imm_vf(uint32_t v)
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
    imm.vstride = BRW_VERTICAL_STRIDE_0;
@@ -464,148 +435,144 @@ static INLINE struct brw_reg brw_imm_vf( GLuint v )
 #define VF_ONE  0x30
 #define VF_NEG  (1<<7)
 
-static INLINE struct brw_reg brw_imm_vf4( GLuint v0, 
-                                           GLuint v1, 
-                                           GLuint v2,
-                                           GLuint v3)
+static inline struct brw_reg brw_imm_vf4(uint32_t v0,
+                                         uint32_t v1,
+                                         uint32_t v2,
+                                         uint32_t v3)
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
    imm.vstride = BRW_VERTICAL_STRIDE_0;
    imm.width = BRW_WIDTH_4;
    imm.hstride = BRW_HORIZONTAL_STRIDE_1;
    imm.dw1.ud = ((v0 << 0) |
-                (v1 << 8) |
-                (v2 << 16) |
-                (v3 << 24));
+                 (v1 << 8) |
+                 (v2 << 16) |
+                 (v3 << 24));
    return imm;
 }
 
 
-static INLINE struct brw_reg brw_address( struct brw_reg reg )
+static inline struct brw_reg brw_address(struct brw_reg reg)
 {
    return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
 }
 
 /** Construct float[1] general-purpose register */
-static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr )
+static inline struct brw_reg brw_vec1_grf(uint32_t nr, uint32_t subnr)
 {
    return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
 }
 
 /** Construct float[2] general-purpose register */
-static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr )
+static inline struct brw_reg brw_vec2_grf(uint32_t nr, uint32_t subnr)
 {
    return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
 }
 
 /** Construct float[4] general-purpose register */
-static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr )
+static inline struct brw_reg brw_vec4_grf(uint32_t nr, uint32_t subnr)
 {
    return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
 }
 
 /** Construct float[8] general-purpose register */
-static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr )
+static inline struct brw_reg brw_vec8_grf(uint32_t nr, uint32_t subnr)
 {
    return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
 }
 
 
-static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr )
+static inline struct brw_reg brw_uw8_grf(uint32_t nr, uint32_t subnr)
 {
    return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
 }
 
-static INLINE struct brw_reg brw_uw16_grf( GLuint nr, GLuint subnr )
+static inline struct brw_reg brw_uw16_grf(uint32_t nr, uint32_t subnr)
 {
    return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
 }
 
-
 /** Construct null register (usually used for setting condition codes) */
-static INLINE struct brw_reg brw_null_reg( void )
+static inline struct brw_reg brw_null_reg(void)
 {
-   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
-                      BRW_ARF_NULL, 
-                      0);
+   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                       BRW_ARF_NULL,
+                       0);
 }
 
-static INLINE struct brw_reg brw_address_reg( GLuint subnr )
+static inline struct brw_reg brw_address_reg(uint32_t subnr)
 {
-   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
-                     BRW_ARF_ADDRESS, 
-                     subnr);
+   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                      BRW_ARF_ADDRESS,
+                      subnr);
 }
 
 /* If/else instructions break in align16 mode if writemask & swizzle
  * aren't xyzw.  This goes against the convention for other scalar
  * regs:
  */
-static INLINE struct brw_reg brw_ip_reg( void )
+static inline struct brw_reg brw_ip_reg(void)
 {
-   return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
-                 BRW_ARF_IP, 
-                 0,
-                 BRW_REGISTER_TYPE_UD,
-                 BRW_VERTICAL_STRIDE_4, /* ? */
-                 BRW_WIDTH_1,
-                 BRW_HORIZONTAL_STRIDE_0,
-                 BRW_SWIZZLE_XYZW, /* NOTE! */
-                 WRITEMASK_XYZW); /* NOTE! */
+   return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                  BRW_ARF_IP,
+                  0,
+                  BRW_REGISTER_TYPE_UD,
+                  BRW_VERTICAL_STRIDE_4, /* ? */
+                  BRW_WIDTH_1,
+                  BRW_HORIZONTAL_STRIDE_0,
+                  BRW_SWIZZLE_XYZW, /* NOTE! */
+                  WRITEMASK_XYZW); /* NOTE! */
 }
 
-static INLINE struct brw_reg brw_acc_reg( void )
+static inline struct brw_reg brw_acc_reg(void)
 {
-   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
-                      BRW_ARF_ACCUMULATOR, 
-                      0);
+   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                       BRW_ARF_ACCUMULATOR,
+                       0);
 }
 
-static INLINE struct brw_reg brw_notification_1_reg(void)
+static inline struct brw_reg brw_notification_1_reg(void)
 {
 
    return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
-                 BRW_ARF_NOTIFICATION_COUNT,
-                 1,
-                 BRW_REGISTER_TYPE_UD,
-                 BRW_VERTICAL_STRIDE_0,
-                 BRW_WIDTH_1,
-                 BRW_HORIZONTAL_STRIDE_0,
-                 BRW_SWIZZLE_XXXX,
-                 WRITEMASK_X);
+                  BRW_ARF_NOTIFICATION_COUNT,
+                  1,
+                  BRW_REGISTER_TYPE_UD,
+                  BRW_VERTICAL_STRIDE_0,
+                  BRW_WIDTH_1,
+                  BRW_HORIZONTAL_STRIDE_0,
+                  BRW_SWIZZLE_XXXX,
+                  WRITEMASK_X);
 }
 
 
-static INLINE struct brw_reg brw_flag_reg( void )
+static inline struct brw_reg brw_flag_reg(void)
 {
    return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
-                     BRW_ARF_FLAG,
-                     0);
+                      BRW_ARF_FLAG,
+                      0);
 }
 
 
-static INLINE struct brw_reg brw_mask_reg( GLuint subnr )
+static inline struct brw_reg brw_mask_reg(uint32_t subnr)
 {
    return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
-                     BRW_ARF_MASK,
-                     subnr);
+                      BRW_ARF_MASK,
+                      subnr);
 }
 
-static INLINE struct brw_reg brw_message_reg( GLuint nr )
+static inline struct brw_reg brw_message_reg(uint32_t nr)
 {
    assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
    return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
-                      nr,
-                      0);
+                       nr,
+                       0);
 }
 
-
-
-
 /* This is almost always called with a numeric constant argument, so
  * make things easy to evaluate at compile time:
  */
-static INLINE GLuint cvt( GLuint val )
+static inline uint32_t cvt(uint32_t val)
 {
    switch (val) {
    case 0: return 0;
@@ -619,10 +586,10 @@ static INLINE GLuint cvt( GLuint val )
    return 0;
 }
 
-static INLINE struct brw_reg stride( struct brw_reg reg,
-                                      GLuint vstride,
-                                      GLuint width,
-                                      GLuint hstride )
+static inline struct brw_reg stride(struct brw_reg reg,
+                                    uint32_t vstride,
+                                    uint32_t width,
+                                    uint32_t hstride)
 {
    reg.vstride = cvt(vstride);
    reg.width = cvt(width) - 1;
@@ -631,103 +598,98 @@ static INLINE struct brw_reg stride( struct brw_reg reg,
 }
 
 
-static INLINE struct brw_reg vec16( struct brw_reg reg )
+static inline struct brw_reg vec16(struct brw_reg reg)
 {
    return stride(reg, 16,16,1);
 }
 
-static INLINE struct brw_reg vec8( struct brw_reg reg )
+static inline struct brw_reg vec8(struct brw_reg reg)
 {
    return stride(reg, 8,8,1);
 }
 
-static INLINE struct brw_reg vec4( struct brw_reg reg )
+static inline struct brw_reg vec4(struct brw_reg reg)
 {
    return stride(reg, 4,4,1);
 }
 
-static INLINE struct brw_reg vec2( struct brw_reg reg )
+static inline struct brw_reg vec2(struct brw_reg reg)
 {
    return stride(reg, 2,2,1);
 }
 
-static INLINE struct brw_reg vec1( struct brw_reg reg )
+static inline struct brw_reg vec1(struct brw_reg reg)
 {
    return stride(reg, 0,1,0);
 }
 
-
-static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt )
+static inline struct brw_reg get_element(struct brw_reg reg, uint32_t elt)
 {
    return vec1(suboffset(reg, elt));
 }
 
-static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt )
+static inline struct brw_reg get_element_ud(struct brw_reg reg, uint32_t elt)
 {
    return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
 }
 
-static INLINE struct brw_reg get_element_d( struct brw_reg reg, GLuint elt )
+static inline struct brw_reg get_element_d(struct brw_reg reg, uint32_t elt)
 {
    return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt));
 }
 
-
-static INLINE struct brw_reg brw_swizzle( struct brw_reg reg,
-                                           GLuint x,
-                                           GLuint y, 
-                                           GLuint z,
-                                           GLuint w)
+static inline struct brw_reg brw_swizzle(struct brw_reg reg,
+                                         uint32_t x,
+                                         uint32_t y,
+                                         uint32_t z,
+                                         uint32_t w)
 {
    assert(reg.file != BRW_IMMEDIATE_VALUE);
 
    reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
-                                      BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
-                                      BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
-                                      BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+                                       BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
+                                       BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
+                                       BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
    return reg;
 }
 
 
-static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg,
-                                            GLuint x )
+static inline struct brw_reg brw_swizzle1(struct brw_reg reg,
+                                          uint32_t x)
 {
    return brw_swizzle(reg, x, x, x, x);
 }
 
-static INLINE struct brw_reg brw_writemask( struct brw_reg reg,
-                                             GLuint mask )
+static inline struct brw_reg brw_writemask(struct brw_reg reg,
+                                           uint32_t mask)
 {
    assert(reg.file != BRW_IMMEDIATE_VALUE);
    reg.dw1.bits.writemask &= mask;
    return reg;
 }
 
-static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg,
-                                                 GLuint mask )
+static inline struct brw_reg brw_set_writemask(struct brw_reg reg, uint32_t mask)
 {
    assert(reg.file != BRW_IMMEDIATE_VALUE);
    reg.dw1.bits.writemask = mask;
    return reg;
 }
 
-static INLINE struct brw_reg negate( struct brw_reg reg )
+static inline struct brw_reg negate(struct brw_reg reg)
 {
    reg.negate ^= 1;
    return reg;
 }
 
-static INLINE struct brw_reg brw_abs( struct brw_reg reg )
+static inline struct brw_reg brw_abs(struct brw_reg reg)
 {
    reg.abs = 1;
    reg.negate = 0;
    return reg;
 }
 
-/***********************************************************************
- */
-static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr,
-                                                 GLint offset )
+static inline struct brw_reg brw_vec4_indirect(uint32_t subnr,
+                                                  int offset)
 {
    struct brw_reg reg =  brw_vec4_grf(0, 0);
    reg.subnr = subnr;
@@ -736,8 +698,7 @@ static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr,
    return reg;
 }
 
-static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr,
-                                                 GLint offset )
+static inline struct brw_reg brw_vec1_indirect(uint32_t subnr, int offset)
 {
    struct brw_reg reg =  brw_vec1_grf(0, 0);
    reg.subnr = subnr;
@@ -746,48 +707,48 @@ static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr,
    return reg;
 }
 
-static INLINE struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset)
+static inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset)
 {
    return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
 }
 
-static INLINE struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset)
+static inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset)
 {
    return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
 }
 
-static INLINE struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset)
+static inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset)
 {
    return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
 }
 
-static INLINE struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset)
+static inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset)
 {
    return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
 }
 
-static INLINE struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset)
+static inline struct brw_reg deref_1d(struct brw_indirect ptr, int offset)
 {
    return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
 }
 
-static INLINE struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset)
+static inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset)
 {
    return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
 }
 
-static INLINE struct brw_reg get_addr_reg(struct brw_indirect ptr)
+static inline struct brw_reg get_addr_reg(struct brw_indirect ptr)
 {
    return brw_address_reg(ptr.addr_subnr);
 }
 
-static INLINE struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset )
+static inline struct brw_indirect brw_indirect_offset(struct brw_indirect ptr, int offset)
 {
    ptr.addr_offset += offset;
    return ptr;
 }
 
-static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset )
+static inline struct brw_indirect brw_indirect(uint32_t addr_subnr, int offset)
 {
    struct brw_indirect ptr;
    ptr.addr_subnr = addr_subnr;
@@ -797,62 +758,62 @@ static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset
 }
 
 /** Do two brw_regs refer to the same register? */
-static INLINE bool
+static inline bool
 brw_same_reg(struct brw_reg r1, struct brw_reg r2)
 {
    return r1.file == r2.file && r1.nr == r2.nr;
 }
 
-static INLINE struct brw_instruction *current_insn( struct brw_compile *p)
+static inline struct brw_instruction *current_insn(struct brw_compile *p)
 {
    return &p->store[p->nr_insn];
 }
 
-void brw_pop_insn_state( struct brw_compile *p );
-void brw_push_insn_state( struct brw_compile *p );
-void brw_set_mask_control( struct brw_compile *p, GLuint value );
-void brw_set_saturate( struct brw_compile *p, GLuint value );
-void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
+void brw_pop_insn_state(struct brw_compile *p);
+void brw_push_insn_state(struct brw_compile *p);
+void brw_set_mask_control(struct brw_compile *p, uint32_t value);
+void brw_set_saturate(struct brw_compile *p, uint32_t value);
+void brw_set_access_mode(struct brw_compile *p, uint32_t access_mode);
 void brw_set_compression_control(struct brw_compile *p, enum brw_compression c);
-void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
-void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
+void brw_set_predicate_control_flag_value(struct brw_compile *p, uint32_t value);
+void brw_set_predicate_control(struct brw_compile *p, uint32_t pc);
 void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse);
-void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
-void brw_set_acc_write_control(struct brw_compile *p, GLuint value);
+void brw_set_conditionalmod(struct brw_compile *p, uint32_t conditional);
+void brw_set_acc_write_control(struct brw_compile *p, uint32_t value);
 
 void brw_init_compile(struct brw_context *, struct brw_compile *p,
-                     void *mem_ctx);
-const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
+                      void *mem_ctx);
+const uint32_t *brw_get_program(struct brw_compile *p, uint32_t *sz);
 
-struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode);
+struct brw_instruction *brw_next_insn(struct brw_compile *p, uint32_t opcode);
 void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
-                 struct brw_reg dest);
+                  struct brw_reg dest);
 void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
-                 struct brw_reg reg);
+                  struct brw_reg reg);
 
 void gen6_resolve_implied_move(struct brw_compile *p,
-                              struct brw_reg *src,
-                              GLuint msg_reg_nr);
+                               struct brw_reg *src,
+                               uint32_t msg_reg_nr);
 
 /* Helpers for regular instructions:
  */
-#define ALU1(OP)                                       \
-struct brw_instruction *brw_##OP(struct brw_compile *p,        \
-             struct brw_reg dest,                      \
-             struct brw_reg src0);
-
-#define ALU2(OP)                                       \
-struct brw_instruction *brw_##OP(struct brw_compile *p,        \
-             struct brw_reg dest,                      \
-             struct brw_reg src0,                      \
-             struct brw_reg src1);
-
-#define ALU3(OP)                                       \
-struct brw_instruction *brw_##OP(struct brw_compile *p,        \
-             struct brw_reg dest,                      \
-             struct brw_reg src0,                      \
-             struct brw_reg src1,                      \
-             struct brw_reg src2);
+#define ALU1(OP)                                          \
+struct brw_instruction *brw_##OP(struct brw_compile *p,   \
+              struct brw_reg dest,                        \
+              struct brw_reg src0);
+
+#define ALU2(OP)                                          \
+struct brw_instruction *brw_##OP(struct brw_compile *p,   \
+              struct brw_reg dest,                        \
+              struct brw_reg src0,                        \
+              struct brw_reg src1);
+
+#define ALU3(OP)                                          \
+struct brw_instruction *brw_##OP(struct brw_compile *p,   \
+              struct brw_reg dest,                        \
+              struct brw_reg src0,                        \
+              struct brw_reg src1,                        \
+              struct brw_reg src2);
 
 #define ROUND(OP) \
 void brw_##OP(struct brw_compile *p, struct brw_reg dest, struct brw_reg src0);
@@ -893,176 +854,101 @@ ROUND(RNDE)
 #undef ROUND
 
 
-/* Helpers for SEND instruction:
- */
+/* Helpers for SEND instruction */
 void brw_set_sampler_message(struct brw_compile *p,
                              struct brw_instruction *insn,
-                             GLuint binding_table_index,
-                             GLuint sampler,
-                             GLuint msg_type,
-                             GLuint response_length,
-                             GLuint msg_length,
-                             GLuint header_present,
-                             GLuint simd_mode,
-                             GLuint return_format);
+                             uint32_t binding_table_index,
+                             uint32_t sampler,
+                             uint32_t msg_type,
+                             uint32_t response_length,
+                             uint32_t msg_length,
+                             uint32_t header_present,
+                             uint32_t simd_mode,
+                             uint32_t return_format);
 
 void brw_set_dp_read_message(struct brw_compile *p,
-                            struct brw_instruction *insn,
-                            GLuint binding_table_index,
-                            GLuint msg_control,
-                            GLuint msg_type,
-                            GLuint target_cache,
-                            GLuint msg_length,
-                            GLuint response_length);
+                             struct brw_instruction *insn,
+                             uint32_t binding_table_index,
+                             uint32_t msg_control,
+                             uint32_t msg_type,
+                             uint32_t target_cache,
+                             uint32_t msg_length,
+                             uint32_t response_length);
 
 void brw_set_dp_write_message(struct brw_compile *p,
-                             struct brw_instruction *insn,
-                             GLuint binding_table_index,
-                             GLuint msg_control,
-                             GLuint msg_type,
-                             GLuint msg_length,
-                             bool header_present,
-                             GLuint last_render_target,
-                             GLuint response_length,
-                             GLuint end_of_thread,
-                             GLuint send_commit_msg);
-
-void brw_urb_WRITE(struct brw_compile *p,
-                  struct brw_reg dest,
-                  GLuint msg_reg_nr,
-                  struct brw_reg src0,
-                  bool allocate,
-                  bool used,
-                  GLuint msg_length,
-                  GLuint response_length,
-                  bool eot,
-                  bool writes_complete,
-                  GLuint offset,
-                  GLuint swizzle);
-
-void brw_ff_sync(struct brw_compile *p,
-                  struct brw_reg dest,
-                  GLuint msg_reg_nr,
-                  struct brw_reg src0,
-                  bool allocate,
-                  GLuint response_length,
-                  bool eot);
-
-void brw_svb_write(struct brw_compile *p,
-                   struct brw_reg dest,
-                   GLuint msg_reg_nr,
-                   struct brw_reg src0,
-                   GLuint binding_table_index,
-                   bool   send_commit_msg);
-
-void brw_fb_WRITE(struct brw_compile *p,
-                 int dispatch_width,
-                  GLuint msg_reg_nr,
-                  struct brw_reg src0,
-                  GLuint binding_table_index,
-                  GLuint msg_length,
-                  GLuint response_length,
-                  bool eot,
-                  bool header_present);
+                              struct brw_instruction *insn,
+                              uint32_t binding_table_index,
+                              uint32_t msg_control,
+                              uint32_t msg_type,
+                              uint32_t msg_length,
+                              bool header_present,
+                              uint32_t last_render_target,
+                              uint32_t response_length,
+                              uint32_t end_of_thread,
+                              uint32_t send_commit_msg);
 
 void brw_SAMPLE(struct brw_compile *p,
-               struct brw_reg dest,
-               GLuint msg_reg_nr,
-               struct brw_reg src0,
-               GLuint binding_table_index,
-               GLuint sampler,
-               GLuint writemask,
-               GLuint msg_type,
-               GLuint response_length,
-               GLuint msg_length,
-               GLuint header_present,
-               GLuint simd_mode,
-               GLuint return_format);
-
-void brw_math_16( struct brw_compile *p,
-                 struct brw_reg dest,
-                 GLuint function,
-                 GLuint saturate,
-                 GLuint msg_reg_nr,
-                 struct brw_reg src,
-                 GLuint precision );
-
-void brw_math( struct brw_compile *p,
-              struct brw_reg dest,
-              GLuint function,
-              GLuint saturate,
-              GLuint msg_reg_nr,
-              struct brw_reg src,
-              GLuint data_type,
-              GLuint precision );
+                struct brw_reg dest,
+                uint32_t msg_reg_nr,
+                struct brw_reg src0,
+                uint32_t binding_table_index,
+                uint32_t sampler,
+                uint32_t writemask,
+                uint32_t msg_type,
+                uint32_t response_length,
+                uint32_t msg_length,
+                uint32_t header_present,
+                uint32_t simd_mode,
+                uint32_t return_format);
+
+void brw_math_16(struct brw_compile *p,
+                 struct brw_reg dest,
+                 uint32_t function,
+                 uint32_t saturate,
+                 uint32_t msg_reg_nr,
+                 struct brw_reg src,
+                 uint32_t precision);
+
+void brw_math(struct brw_compile *p,
+               struct brw_reg dest,
+               uint32_t function,
+               uint32_t saturate,
+               uint32_t msg_reg_nr,
+               struct brw_reg src,
+               uint32_t data_type,
+               uint32_t precision);
 
 void brw_math2(struct brw_compile *p,
-              struct brw_reg dest,
-              GLuint function,
-              struct brw_reg src0,
-              struct brw_reg src1);
+               struct brw_reg dest,
+               uint32_t function,
+               struct brw_reg src0,
+               struct brw_reg src1);
 
 void brw_oword_block_read(struct brw_compile *p,
-                         struct brw_reg dest,
-                         struct brw_reg mrf,
-                         uint32_t offset,
-                         uint32_t bind_table_index);
+                          struct brw_reg dest,
+                          struct brw_reg mrf,
+                          uint32_t offset,
+                          uint32_t bind_table_index);
 
 void brw_oword_block_read_scratch(struct brw_compile *p,
-                                 struct brw_reg dest,
-                                 struct brw_reg mrf,
-                                 int num_regs,
-                                 GLuint offset);
+                                  struct brw_reg dest,
+                                  struct brw_reg mrf,
+                                  int num_regs,
+                                  uint32_t offset);
 
 void brw_oword_block_write_scratch(struct brw_compile *p,
-                                  struct brw_reg mrf,
-                                  int num_regs,
-                                  GLuint offset);
+                                   struct brw_reg mrf,
+                                   int num_regs,
+                                   uint32_t offset);
 
 void brw_dword_scattered_read(struct brw_compile *p,
-                             struct brw_reg dest,
-                             struct brw_reg mrf,
-                             uint32_t bind_table_index);
-
-void brw_dp_READ_4_vs( struct brw_compile *p,
-                       struct brw_reg dest,
-                       GLuint location,
-                       GLuint bind_table_index );
-
-void brw_dp_READ_4_vs_relative(struct brw_compile *p,
-                              struct brw_reg dest,
-                              struct brw_reg addrReg,
-                              GLuint offset,
-                              GLuint bind_table_index);
-
-/* If/else/endif.  Works by manipulating the execution flags on each
- * channel.
- */
-struct brw_instruction *brw_IF(struct brw_compile *p, 
-                              GLuint execute_size);
-struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional,
-                               struct brw_reg src0, struct brw_reg src1);
-
-void brw_ELSE(struct brw_compile *p);
-void brw_ENDIF(struct brw_compile *p);
-
-/* DO/WHILE loops:
- */
-struct brw_instruction *brw_DO(struct brw_compile *p,
-                              GLuint execute_size);
-
-struct brw_instruction *brw_WHILE(struct brw_compile *p);
-
-struct brw_instruction *brw_BREAK(struct brw_compile *p);
-struct brw_instruction *brw_CONT(struct brw_compile *p);
-struct brw_instruction *gen6_CONT(struct brw_compile *p);
-struct brw_instruction *gen6_HALT(struct brw_compile *p);
+                              struct brw_reg dest,
+                              struct brw_reg mrf,
+                              uint32_t bind_table_index);
 /* Forward jumps:
  */
 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx);
 
-
-
 void brw_NOP(struct brw_compile *p);
 
 void brw_WAIT(struct brw_compile *p);
@@ -1071,53 +957,48 @@ void brw_WAIT(struct brw_compile *p);
  * taken from src0:
  */
 void brw_CMP(struct brw_compile *p,
-            struct brw_reg dest,
-            GLuint conditional,
-            struct brw_reg src0,
-            struct brw_reg src1);
+             struct brw_reg dest,
+             uint32_t conditional,
+             struct brw_reg src0,
+             struct brw_reg src1);
 
-void brw_print_reg( struct brw_reg reg );
-
-
-/*********************************************************************** 
- * brw_eu_util.c:
- */
+void brw_print_reg(struct brw_reg reg);
 
 void brw_copy_indirect_to_indirect(struct brw_compile *p,
-                                  struct brw_indirect dst_ptr,
-                                  struct brw_indirect src_ptr,
-                                  GLuint count);
+                                   struct brw_indirect dst_ptr,
+                                   struct brw_indirect src_ptr,
+                                   uint32_t count);
 
 void brw_copy_from_indirect(struct brw_compile *p,
-                           struct brw_reg dst,
-                           struct brw_indirect ptr,
-                           GLuint count);
+                            struct brw_reg dst,
+                            struct brw_indirect ptr,
+                            uint32_t count);
 
 void brw_copy4(struct brw_compile *p,
-              struct brw_reg dst,
-              struct brw_reg src,
-              GLuint count);
+               struct brw_reg dst,
+               struct brw_reg src,
+               uint32_t count);
 
 void brw_copy8(struct brw_compile *p,
-              struct brw_reg dst,
-              struct brw_reg src,
-              GLuint count);
+               struct brw_reg dst,
+               struct brw_reg src,
+               uint32_t count);
 
-void brw_math_invert( struct brw_compile *p, 
-                     struct brw_reg dst,
-                     struct brw_reg src);
+void brw_math_invert(struct brw_compile *p,
+                      struct brw_reg dst,
+                      struct brw_reg src);
 
 void brw_set_src1(struct brw_compile *p,
-                 struct brw_instruction *insn,
-                 struct brw_reg reg);
+                  struct brw_instruction *insn,
+                  struct brw_reg reg);
 
 void brw_set_uip_jip(struct brw_compile *p);
 
 uint32_t brw_swap_cmod(uint32_t cmod);
 
-/* brw_optimize.c */
-void brw_optimize(struct brw_compile *p);
-void brw_remove_duplicate_mrf_moves(struct brw_compile *p);
-void brw_remove_grf_to_mrf_moves(struct brw_compile *p);
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* BRW_EU_H */
 
-#endif
index 210b058..fe0c703 100644 (file)
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-     
 
-#include "brw_context.h"
+// #include "brw_context.h"
 #include "brw_defines.h"
 #include "brw_eu.h"
 
-#include "glsl/ralloc.h"
+#include <string.h>
+
+#define Elements(x) (sizeof(x) / sizeof(*(x)))
 
 /***********************************************************************
  * Internal helper for constructing instructions
  */
 
 static void guess_execution_size(struct brw_compile *p,
-                                struct brw_instruction *insn,
-                                struct brw_reg reg)
+                                 struct brw_instruction *insn,
+                                 struct brw_reg reg)
 {
    if (reg.width == BRW_WIDTH_8 && p->compressed)
       insn->header.execution_size = BRW_EXECUTE_16;
    else
-      insn->header.execution_size = reg.width; /* note - definitions are compatible */
+      insn->header.execution_size = reg.width;        /* note - definitions are compatible */
 }
 
 
@@ -53,11 +54,10 @@ static void guess_execution_size(struct brw_compile *p,
  */
 void
 gen6_resolve_implied_move(struct brw_compile *p,
-                         struct brw_reg *src,
-                         GLuint msg_reg_nr)
+                          struct brw_reg *src,
+                          uint32_t msg_reg_nr)
 {
-   struct intel_context *intel = &p->brw->intel;
-   if (intel->gen < 6)
+   if (p->gen < 6)
       return;
 
    if (src->file == BRW_MESSAGE_REGISTER_FILE)
@@ -68,7 +68,7 @@ gen6_resolve_implied_move(struct brw_compile *p,
       brw_set_mask_control(p, BRW_MASK_DISABLE);
       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
       brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
-             retype(*src, BRW_REGISTER_TYPE_UD));
+              retype(*src, BRW_REGISTER_TYPE_UD));
       brw_pop_insn_state(p);
    }
    *src = brw_message_reg(msg_reg_nr);
@@ -85,8 +85,7 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
     * Since we're pretending to have 16 MRFs anyway, we may as well use the
     * registers required for messages with EOT.
     */
-   struct intel_context *intel = &p->brw->intel;
-   if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
+   if (p->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
       reg->file = BRW_GENERAL_REGISTER_FILE;
       reg->nr += GEN7_MRF_HACK_START;
    }
@@ -95,7 +94,7 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
 
 void
 brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
-            struct brw_reg dest)
+             struct brw_reg dest)
 {
    if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
        dest.file != BRW_MESSAGE_REGISTER_FILE)
@@ -111,16 +110,16 @@ brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
       insn->bits1.da1.dest_reg_nr = dest.nr;
 
       if (insn->header.access_mode == BRW_ALIGN_1) {
-        insn->bits1.da1.dest_subreg_nr = dest.subnr;
-        if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
-           dest.hstride = BRW_HORIZONTAL_STRIDE_1;
-        insn->bits1.da1.dest_horiz_stride = dest.hstride;
+         insn->bits1.da1.dest_subreg_nr = dest.subnr;
+         if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+            dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+         insn->bits1.da1.dest_horiz_stride = dest.hstride;
       }
       else {
-        insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
-        insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
-        /* even ignored in da16, still need to set as '01' */
-        insn->bits1.da16.dest_horiz_stride = 1;
+         insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
+         insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+         /* even ignored in da16, still need to set as '01' */
+         insn->bits1.da16.dest_horiz_stride = 1;
       }
    }
    else {
@@ -129,15 +128,15 @@ brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
       /* These are different sizes in align1 vs align16:
        */
       if (insn->header.access_mode == BRW_ALIGN_1) {
-        insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
-        if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
-           dest.hstride = BRW_HORIZONTAL_STRIDE_1;
-        insn->bits1.ia1.dest_horiz_stride = dest.hstride;
+         insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+         if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+            dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+         insn->bits1.ia1.dest_horiz_stride = dest.hstride;
       }
       else {
-        insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
-        /* even ignored in da16, still need to set as '01' */
-        insn->bits1.ia16.dest_horiz_stride = 1;
+         insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+         /* even ignored in da16, still need to set as '01' */
+         insn->bits1.ia16.dest_horiz_stride = 1;
       }
    }
 
@@ -164,8 +163,8 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg)
        * destination horiz stride has to be a word.
        */
       if (reg.type == BRW_REGISTER_TYPE_V) {
-        assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
-               reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
+         assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
+                reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
       }
 
       return;
@@ -189,7 +188,7 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg)
    width = width_for_reg[reg.width];
 
    assert(insn->header.execution_size >= 0 &&
-         insn->header.execution_size < Elements(execsize_for_reg));
+          insn->header.execution_size < Elements(execsize_for_reg));
    execsize = execsize_for_reg[insn->header.execution_size];
 
    /* Restrictions from 3.3.10: Register Region Restrictions. */
@@ -227,7 +226,7 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg)
 
 void
 brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
-            struct brw_reg reg)
+             struct brw_reg reg)
 {
    if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
@@ -253,60 +252,60 @@ brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
    else 
    {
       if (reg.address_mode == BRW_ADDRESS_DIRECT) {
-        if (insn->header.access_mode == BRW_ALIGN_1) {
-           insn->bits2.da1.src0_subreg_nr = reg.subnr;
-           insn->bits2.da1.src0_reg_nr = reg.nr;
-        }
-        else {
-           insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
-           insn->bits2.da16.src0_reg_nr = reg.nr;
-        }
+         if (insn->header.access_mode == BRW_ALIGN_1) {
+            insn->bits2.da1.src0_subreg_nr = reg.subnr;
+            insn->bits2.da1.src0_reg_nr = reg.nr;
+         }
+         else {
+            insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
+            insn->bits2.da16.src0_reg_nr = reg.nr;
+         }
       }
       else {
-        insn->bits2.ia1.src0_subreg_nr = reg.subnr;
-
-        if (insn->header.access_mode == BRW_ALIGN_1) {
-           insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 
-        }
-        else {
-           insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
-        }
+         insn->bits2.ia1.src0_subreg_nr = reg.subnr;
+
+         if (insn->header.access_mode == BRW_ALIGN_1) {
+            insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 
+         }
+         else {
+            insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
+         }
       }
 
       if (insn->header.access_mode == BRW_ALIGN_1) {
-        if (reg.width == BRW_WIDTH_1 && 
-            insn->header.execution_size == BRW_EXECUTE_1) {
-           insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
-           insn->bits2.da1.src0_width = BRW_WIDTH_1;
-           insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
-        }
-        else {
-           insn->bits2.da1.src0_horiz_stride = reg.hstride;
-           insn->bits2.da1.src0_width = reg.width;
-           insn->bits2.da1.src0_vert_stride = reg.vstride;
-        }
+         if (reg.width == BRW_WIDTH_1 && 
+             insn->header.execution_size == BRW_EXECUTE_1) {
+            insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+            insn->bits2.da1.src0_width = BRW_WIDTH_1;
+            insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
+         }
+         else {
+            insn->bits2.da1.src0_horiz_stride = reg.hstride;
+            insn->bits2.da1.src0_width = reg.width;
+            insn->bits2.da1.src0_vert_stride = reg.vstride;
+         }
       }
       else {
-        insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
-        insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
-        insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
-        insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
-
-        /* This is an oddity of the fact we're using the same
-         * descriptions for registers in align_16 as align_1:
-         */
-        if (reg.vstride == BRW_VERTICAL_STRIDE_8)
-           insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
-        else
-           insn->bits2.da16.src0_vert_stride = reg.vstride;
+         insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+         insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+         insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+         insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+         /* This is an oddity of the fact we're using the same
+          * descriptions for registers in align_16 as align_1:
+          */
+         if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+            insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
+         else
+            insn->bits2.da16.src0_vert_stride = reg.vstride;
       }
    }
 }
 
 
 void brw_set_src1(struct brw_compile *p,
-                 struct brw_instruction *insn,
-                 struct brw_reg reg)
+                  struct brw_instruction *insn,
+                  struct brw_reg reg)
 {
    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
 
@@ -336,40 +335,40 @@ void brw_set_src1(struct brw_compile *p,
       /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
 
       if (insn->header.access_mode == BRW_ALIGN_1) {
-        insn->bits3.da1.src1_subreg_nr = reg.subnr;
-        insn->bits3.da1.src1_reg_nr = reg.nr;
+         insn->bits3.da1.src1_subreg_nr = reg.subnr;
+         insn->bits3.da1.src1_reg_nr = reg.nr;
       }
       else {
-        insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
-        insn->bits3.da16.src1_reg_nr = reg.nr;
+         insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
+         insn->bits3.da16.src1_reg_nr = reg.nr;
       }
 
       if (insn->header.access_mode == BRW_ALIGN_1) {
-        if (reg.width == BRW_WIDTH_1 && 
-            insn->header.execution_size == BRW_EXECUTE_1) {
-           insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
-           insn->bits3.da1.src1_width = BRW_WIDTH_1;
-           insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
-        }
-        else {
-           insn->bits3.da1.src1_horiz_stride = reg.hstride;
-           insn->bits3.da1.src1_width = reg.width;
-           insn->bits3.da1.src1_vert_stride = reg.vstride;
-        }
+         if (reg.width == BRW_WIDTH_1 && 
+             insn->header.execution_size == BRW_EXECUTE_1) {
+            insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+            insn->bits3.da1.src1_width = BRW_WIDTH_1;
+            insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
+         }
+         else {
+            insn->bits3.da1.src1_horiz_stride = reg.hstride;
+            insn->bits3.da1.src1_width = reg.width;
+            insn->bits3.da1.src1_vert_stride = reg.vstride;
+         }
       }
       else {
-        insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
-        insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
-        insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
-        insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
-
-        /* This is an oddity of the fact we're using the same
-         * descriptions for registers in align_16 as align_1:
-         */
-        if (reg.vstride == BRW_VERTICAL_STRIDE_8)
-           insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
-        else
-           insn->bits3.da16.src1_vert_stride = reg.vstride;
+         insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+         insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+         insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+         insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+         /* This is an oddity of the fact we're using the same
+          * descriptions for registers in align_16 as align_1:
+          */
+         if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+            insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
+         else
+            insn->bits3.da16.src1_vert_stride = reg.vstride;
       }
    }
 }
@@ -384,30 +383,28 @@ void brw_set_src1(struct brw_compile *p,
  */
 static void
 brw_set_message_descriptor(struct brw_compile *p,
-                          struct brw_instruction *inst,
-                          enum brw_message_target sfid,
-                          unsigned msg_length,
-                          unsigned response_length,
-                          bool header_present,
-                          bool end_of_thread)
+                           struct brw_instruction *inst,
+                           enum brw_message_target sfid,
+                           unsigned msg_length,
+                           unsigned response_length,
+                           bool header_present,
+                           bool end_of_thread)
 {
-   struct intel_context *intel = &p->brw->intel;
-
    brw_set_src1(p, inst, brw_imm_d(0));
 
-   if (intel->gen >= 5) {
+   if (p->gen >= 5) {
       inst->bits3.generic_gen5.header_present = header_present;
       inst->bits3.generic_gen5.response_length = response_length;
       inst->bits3.generic_gen5.msg_length = msg_length;
       inst->bits3.generic_gen5.end_of_thread = end_of_thread;
 
-      if (intel->gen >= 6) {
-        /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
-        inst->header.destreg__conditionalmod = sfid;
+      if (p->gen >= 6) {
+         /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
+         inst->header.destreg__conditionalmod = sfid;
       } else {
-        /* Set Extended Message Descriptor (ex_desc) */
-        inst->bits2.send_gen5.sfid = sfid;
-        inst->bits2.send_gen5.end_of_thread = end_of_thread;
+         /* Set Extended Message Descriptor (ex_desc) */
+         inst->bits2.send_gen5.sfid = sfid;
+         inst->bits2.send_gen5.end_of_thread = end_of_thread;
       }
    } else {
       inst->bits3.generic.response_length = response_length;
@@ -417,16 +414,14 @@ brw_set_message_descriptor(struct brw_compile *p,
    }
 }
 
-static void brw_set_math_message( struct brw_compile *p,
-                                 struct brw_instruction *insn,
-                                 GLuint function,
-                                 GLuint integer_type,
-                                 bool low_precision,
-                                 bool saturate,
-                                 GLuint dataType )
+static void brw_set_math_message(struct brw_compile *p,
+                                 struct brw_instruction *insn,
+                                 uint32_t function,
+                                 uint32_t integer_type,
+                                 bool low_precision,
+                                 bool saturate,
+                                 uint32_t dataType)
 {
-   struct brw_context *brw = p->brw;
-   struct intel_context *intel = &brw->intel;
    unsigned msg_length;
    unsigned response_length;
 
@@ -455,8 +450,8 @@ static void brw_set_math_message( struct brw_compile *p,
    }
 
    brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
-                             msg_length, response_length, false, false);
-   if (intel->gen == 5) {
+                              msg_length, response_length, false, false);
+   if (p->gen == 5) {
       insn->bits3.math_gen5.function = function;
       insn->bits3.math_gen5.int_type = integer_type;
       insn->bits3.math_gen5.precision = low_precision;
@@ -472,236 +467,92 @@ static void brw_set_math_message( struct brw_compile *p,
    }
 }
 
-
-static void brw_set_ff_sync_message(struct brw_compile *p,
-                                   struct brw_instruction *insn,
-                                   bool allocate,
-                                   GLuint response_length,
-                                   bool end_of_thread)
-{
-   brw_set_message_descriptor(p, insn, BRW_SFID_URB,
-                             1, response_length, true, end_of_thread);
-   insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
-   insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
-   insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
-   insn->bits3.urb_gen5.allocate = allocate;
-   insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
-   insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
-}
-
-static void brw_set_urb_message( struct brw_compile *p,
-                                struct brw_instruction *insn,
-                                bool allocate,
-                                bool used,
-                                GLuint msg_length,
-                                GLuint response_length,
-                                bool end_of_thread,
-                                bool complete,
-                                GLuint offset,
-                                GLuint swizzle_control )
-{
-   struct brw_context *brw = p->brw;
-   struct intel_context *intel = &brw->intel;
-
-   brw_set_message_descriptor(p, insn, BRW_SFID_URB,
-                             msg_length, response_length, true, end_of_thread);
-   if (intel->gen == 7) {
-      insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
-      insn->bits3.urb_gen7.offset = offset;
-      assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
-      insn->bits3.urb_gen7.swizzle_control = swizzle_control;
-      /* per_slot_offset = 0 makes it ignore offsets in message header */
-      insn->bits3.urb_gen7.per_slot_offset = 0;
-      insn->bits3.urb_gen7.complete = complete;
-   } else if (intel->gen >= 5) {
-      insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
-      insn->bits3.urb_gen5.offset = offset;
-      insn->bits3.urb_gen5.swizzle_control = swizzle_control;
-      insn->bits3.urb_gen5.allocate = allocate;
-      insn->bits3.urb_gen5.used = used;        /* ? */
-      insn->bits3.urb_gen5.complete = complete;
-   } else {
-      insn->bits3.urb.opcode = 0;      /* ? */
-      insn->bits3.urb.offset = offset;
-      insn->bits3.urb.swizzle_control = swizzle_control;
-      insn->bits3.urb.allocate = allocate;
-      insn->bits3.urb.used = used;     /* ? */
-      insn->bits3.urb.complete = complete;
-   }
-}
-
 void
 brw_set_dp_write_message(struct brw_compile *p,
-                        struct brw_instruction *insn,
-                        GLuint binding_table_index,
-                        GLuint msg_control,
-                        GLuint msg_type,
-                        GLuint msg_length,
-                        bool header_present,
-                        GLuint last_render_target,
-                        GLuint response_length,
-                        GLuint end_of_thread,
-                        GLuint send_commit_msg)
+                         struct brw_instruction *insn,
+                         uint32_t binding_table_index,
+                         uint32_t msg_control,
+                         uint32_t msg_type,
+                         uint32_t msg_length,
+                         bool header_present,
+                         uint32_t last_render_target,
+                         uint32_t response_length,
+                         uint32_t end_of_thread,
+                         uint32_t send_commit_msg)
 {
-   struct brw_context *brw = p->brw;
-   struct intel_context *intel = &brw->intel;
    unsigned sfid;
 
-   if (intel->gen >= 7) {
-      /* Use the Render Cache for RT writes; otherwise use the Data Cache */
-      if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
-        sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
-      else
-        sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
-   } else if (intel->gen == 6) {
-      /* Use the render cache for all write messages. */
-      sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
-   } else {
-      sfid = BRW_SFID_DATAPORT_WRITE;
-   }
-
+   /* Use the Render Cache for RT writes; otherwise use the Data Cache */
+   if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
+    sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
+   else
+    sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
    brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
-                             header_present, end_of_thread);
-
-   if (intel->gen >= 7) {
-      insn->bits3.gen7_dp.binding_table_index = binding_table_index;
-      insn->bits3.gen7_dp.msg_control = msg_control;
-      insn->bits3.gen7_dp.last_render_target = last_render_target;
-      insn->bits3.gen7_dp.msg_type = msg_type;
-   } else if (intel->gen == 6) {
-      insn->bits3.gen6_dp.binding_table_index = binding_table_index;
-      insn->bits3.gen6_dp.msg_control = msg_control;
-      insn->bits3.gen6_dp.last_render_target = last_render_target;
-      insn->bits3.gen6_dp.msg_type = msg_type;
-      insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
-   } else if (intel->gen == 5) {
-      insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
-      insn->bits3.dp_write_gen5.msg_control = msg_control;
-      insn->bits3.dp_write_gen5.last_render_target = last_render_target;
-      insn->bits3.dp_write_gen5.msg_type = msg_type;
-      insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
-   } else {
-      insn->bits3.dp_write.binding_table_index = binding_table_index;
-      insn->bits3.dp_write.msg_control = msg_control;
-      insn->bits3.dp_write.last_render_target = last_render_target;
-      insn->bits3.dp_write.msg_type = msg_type;
-      insn->bits3.dp_write.send_commit_msg = send_commit_msg;
-   }
+                              header_present, end_of_thread);
+
+   insn->bits3.gen7_dp.binding_table_index = binding_table_index;
+   insn->bits3.gen7_dp.msg_control = msg_control;
+   insn->bits3.gen7_dp.last_render_target = last_render_target;
+   insn->bits3.gen7_dp.msg_type = msg_type;
 }
 
 void
 brw_set_dp_read_message(struct brw_compile *p,
-                       struct brw_instruction *insn,
-                       GLuint binding_table_index,
-                       GLuint msg_control,
-                       GLuint msg_type,
-                       GLuint target_cache,
-                       GLuint msg_length,
-                       GLuint response_length)
+                        struct brw_instruction *insn,
+                        uint32_t binding_table_index,
+                        uint32_t msg_control,
+                        uint32_t msg_type,
+                        uint32_t target_cache,
+                        uint32_t msg_length,
+                        uint32_t response_length)
 {
-   struct brw_context *brw = p->brw;
-   struct intel_context *intel = &brw->intel;
    unsigned sfid;
 
-   if (intel->gen >= 7) {
-      sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
-   } else if (intel->gen == 6) {
-      if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
-        sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
-      else
-        sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
-   } else {
-      sfid = BRW_SFID_DATAPORT_READ;
-   }
-
+   sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
    brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
-                             true, false);
-
-   if (intel->gen >= 7) {
-      insn->bits3.gen7_dp.binding_table_index = binding_table_index;
-      insn->bits3.gen7_dp.msg_control = msg_control;
-      insn->bits3.gen7_dp.last_render_target = 0;
-      insn->bits3.gen7_dp.msg_type = msg_type;
-   } else if (intel->gen == 6) {
-      insn->bits3.gen6_dp.binding_table_index = binding_table_index;
-      insn->bits3.gen6_dp.msg_control = msg_control;
-      insn->bits3.gen6_dp.last_render_target = 0;
-      insn->bits3.gen6_dp.msg_type = msg_type;
-      insn->bits3.gen6_dp.send_commit_msg = 0;
-   } else if (intel->gen == 5) {
-      insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
-      insn->bits3.dp_read_gen5.msg_control = msg_control;
-      insn->bits3.dp_read_gen5.msg_type = msg_type;
-      insn->bits3.dp_read_gen5.target_cache = target_cache;
-   } else if (intel->is_g4x) {
-      insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
-      insn->bits3.dp_read_g4x.msg_control = msg_control;  /*8:10*/
-      insn->bits3.dp_read_g4x.msg_type = msg_type;  /*11:13*/
-      insn->bits3.dp_read_g4x.target_cache = target_cache;  /*14:15*/
-   } else {
-      insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
-      insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
-      insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
-      insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
-   }
+                              true, false);
+
+   insn->bits3.gen7_dp.binding_table_index = binding_table_index;
+   insn->bits3.gen7_dp.msg_control = msg_control;
+   insn->bits3.gen7_dp.last_render_target = 0;
+   insn->bits3.gen7_dp.msg_type = msg_type;
 }
 
 void
 brw_set_sampler_message(struct brw_compile *p,
                         struct brw_instruction *insn,
-                        GLuint binding_table_index,
-                        GLuint sampler,
-                        GLuint msg_type,
-                        GLuint response_length,
-                        GLuint msg_length,
-                        GLuint header_present,
-                        GLuint simd_mode,
-                        GLuint return_format)
+                        uint32_t binding_table_index,
+                        uint32_t sampler,
+                        uint32_t msg_type,
+                        uint32_t response_length,
+                        uint32_t msg_length,
+                        uint32_t header_present,
+                        uint32_t simd_mode,
+                        uint32_t return_format)
 {
-   struct brw_context *brw = p->brw;
-   struct intel_context *intel = &brw->intel;
-
    brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
-                             response_length, header_present, false);
-
-   if (intel->gen >= 7) {
-      insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
-      insn->bits3.sampler_gen7.sampler = sampler;
-      insn->bits3.sampler_gen7.msg_type = msg_type;
-      insn->bits3.sampler_gen7.simd_mode = simd_mode;
-   } else if (intel->gen >= 5) {
-      insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
-      insn->bits3.sampler_gen5.sampler = sampler;
-      insn->bits3.sampler_gen5.msg_type = msg_type;
-      insn->bits3.sampler_gen5.simd_mode = simd_mode;
-   } else if (intel->is_g4x) {
-      insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
-      insn->bits3.sampler_g4x.sampler = sampler;
-      insn->bits3.sampler_g4x.msg_type = msg_type;
-   } else {
-      insn->bits3.sampler.binding_table_index = binding_table_index;
-      insn->bits3.sampler.sampler = sampler;
-      insn->bits3.sampler.msg_type = msg_type;
-      insn->bits3.sampler.return_format = return_format;
-   }
+                              response_length, header_present, false);
+   insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
+   insn->bits3.sampler_gen7.sampler = sampler;
+   insn->bits3.sampler_gen7.msg_type = msg_type;
+   insn->bits3.sampler_gen7.simd_mode = simd_mode;
 }
 
-
 #define next_insn brw_next_insn
 struct brw_instruction *
-brw_next_insn(struct brw_compile *p, GLuint opcode)
+brw_next_insn(struct brw_compile *p, uint32_t opcode)
 {
    struct brw_instruction *insn;
-
+   assert(0);
+#if 0
    if (p->nr_insn + 1 > p->store_size) {
-      if (0)
-         printf("incresing the store size to %d\n", p->store_size << 1);
       p->store_size <<= 1;
       p->store = reralloc(p->mem_ctx, p->store,
                           struct brw_instruction, p->store_size);
       if (!p->store)
          assert(!"realloc eu store memeory failed");
    }
+#endif
 
    insn = &p->store[p->nr_insn++];
    memcpy(insn, p->current, sizeof(*insn));
@@ -718,10 +569,10 @@ brw_next_insn(struct brw_compile *p, GLuint opcode)
    return insn;
 }
 
-static struct brw_instruction *brw_alu1( struct brw_compile *p,
-                                        GLuint opcode,
-                                        struct brw_reg dest,
-                                        struct brw_reg src )
+static struct brw_instruction *brw_alu1(struct brw_compile *p,
+                                        uint32_t opcode,
+                                        struct brw_reg dest,
+                                        struct brw_reg src)
 {
    struct brw_instruction *insn = next_insn(p, opcode);
    brw_set_dest(p, insn, dest);
@@ -730,10 +581,10 @@ static struct brw_instruction *brw_alu1( struct brw_compile *p,
 }
 
 static struct brw_instruction *brw_alu2(struct brw_compile *p,
-                                       GLuint opcode,
-                                       struct brw_reg dest,
-                                       struct brw_reg src0,
-                                       struct brw_reg src1 )
+                                        uint32_t opcode,
+                                        struct brw_reg dest,
+                                        struct brw_reg src0,
+                                        struct brw_reg src1)
 {
    struct brw_instruction *insn = next_insn(p, opcode);   
    brw_set_dest(p, insn, dest);
@@ -754,11 +605,11 @@ get_3src_subreg_nr(struct brw_reg reg)
 }
 
 static struct brw_instruction *brw_alu3(struct brw_compile *p,
-                                       GLuint opcode,
-                                       struct brw_reg dest,
-                                       struct brw_reg src0,
-                                       struct brw_reg src1,
-                                       struct brw_reg src2)
+                                        uint32_t opcode,
+                                        struct brw_reg dest,
+                                        struct brw_reg src0,
+                                        struct brw_reg src1,
+                                        struct brw_reg src2)
 {
    struct brw_instruction *insn = next_insn(p, opcode);
 
@@ -767,7 +618,7 @@ static struct brw_instruction *brw_alu3(struct brw_compile *p,
    assert(insn->header.access_mode == BRW_ALIGN_16);
 
    assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
-         dest.file == BRW_MESSAGE_REGISTER_FILE);
+          dest.file == BRW_MESSAGE_REGISTER_FILE);
    assert(dest.nr < 128);
    assert(dest.address_mode == BRW_ADDRESS_DIRECT);
    assert(dest.type = BRW_REGISTER_TYPE_F);
@@ -818,31 +669,32 @@ static struct brw_instruction *brw_alu3(struct brw_compile *p,
 /***********************************************************************
  * Convenience routines.
  */
-#define ALU1(OP)                                       \
-struct brw_instruction *brw_##OP(struct brw_compile *p,        \
-             struct brw_reg dest,                      \
-             struct brw_reg src0)                      \
-{                                                      \
-   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);            \
-}
-
-#define ALU2(OP)                                       \
-struct brw_instruction *brw_##OP(struct brw_compile *p,        \
-             struct brw_reg dest,                      \
-             struct brw_reg src0,                      \
-             struct brw_reg src1)                      \
-{                                                      \
-   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);      \
-}
-
-#define ALU3(OP)                                       \
-struct brw_instruction *brw_##OP(struct brw_compile *p,        \
-             struct brw_reg dest,                      \
-             struct brw_reg src0,                      \
-             struct brw_reg src1,                      \
-             struct brw_reg src2)                      \
-{                                                      \
-   return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2);        \
+#define ALU1(OP)                                          \
+struct brw_instruction *brw_##OP(struct brw_compile *p,   \
+              struct brw_reg dest,                        \
+              struct brw_reg src0)                        \
+{                                                         \
+   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);       \
+}
+
+#define ALU2(OP)                                          \
+struct brw_instruction *brw_##OP(struct brw_compile *p,   \
+              struct brw_reg dest,                        \
+              struct brw_reg src0,                        \
+              struct brw_reg src1)                        \
+{                                                         \
+   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
+}
+
+#define ALU3(OP)                                          \
+struct brw_instruction *brw_##OP(struct brw_compile *p,   \
+              struct brw_reg dest,                        \
+              struct brw_reg src0,                        \
+              struct brw_reg src1,                        \
+              struct brw_reg src2)                        \
+{                                                         \
+   return brw_alu3(p, BRW_OPCODE_##OP, dest,              \
+                   src0, src1, src2);                     \
 }
 
 /* Rounding operations (other than RNDD) require two instructions - the first
@@ -852,25 +704,17 @@ struct brw_instruction *brw_##OP(struct brw_compile *p,   \
  *
  * Sandybridge and later appear to round correctly without an ADD.
  */
-#define ROUND(OP)                                                            \
-void brw_##OP(struct brw_compile *p,                                         \
-             struct brw_reg dest,                                            \
-             struct brw_reg src)                                             \
-{                                                                            \
-   struct brw_instruction *rnd, *add;                                        \
-   rnd = next_insn(p, BRW_OPCODE_##OP);                                              \
-   brw_set_dest(p, rnd, dest);                                               \
-   brw_set_src0(p, rnd, src);                                                \
-                                                                             \
-   if (p->brw->intel.gen < 6) {                                                      \
-      /* turn on round-increments */                                         \
-      rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R;               \
-      add = brw_ADD(p, dest, dest, brw_imm_f(1.0f));                         \
-      add->header.predicate_control = BRW_PREDICATE_NORMAL;                  \
-   }                                                                         \
+#define ROUND(OP)                        \
+void brw_##OP(struct brw_compile *p,     \
+              struct brw_reg dest,       \
+              struct brw_reg src)        \
+{                                        \
+   struct brw_instruction *rnd;          \
+   rnd = next_insn(p, BRW_OPCODE_##OP);  \
+   brw_set_dest(p, rnd, dest);           \
+   brw_set_src0(p, rnd, src);            \
 }
 
-
 ALU1(MOV)
 ALU2(SEL)
 ALU1(NOT)
@@ -898,23 +742,22 @@ ALU3(MAD)
 ROUND(RNDZ)
 ROUND(RNDE)
 
-
 struct brw_instruction *brw_ADD(struct brw_compile *p,
-                               struct brw_reg dest,
-                               struct brw_reg src0,
-                               struct brw_reg src1)
+                                struct brw_reg dest,
+                                struct brw_reg src0,
+                                struct brw_reg src1)
 {
    /* 6.2.2: add */
    if (src0.type == BRW_REGISTER_TYPE_F ||
        (src0.file == BRW_IMMEDIATE_VALUE &&
-       src0.type == BRW_REGISTER_TYPE_VF)) {
+        src0.type == BRW_REGISTER_TYPE_VF)) {
       assert(src1.type != BRW_REGISTER_TYPE_UD);
       assert(src1.type != BRW_REGISTER_TYPE_D);
    }
 
    if (src1.type == BRW_REGISTER_TYPE_F ||
        (src1.file == BRW_IMMEDIATE_VALUE &&
-       src1.type == BRW_REGISTER_TYPE_VF)) {
+        src1.type == BRW_REGISTER_TYPE_VF)) {
       assert(src0.type != BRW_REGISTER_TYPE_UD);
       assert(src0.type != BRW_REGISTER_TYPE_D);
    }
@@ -923,9 +766,9 @@ struct brw_instruction *brw_ADD(struct brw_compile *p,
 }
 
 struct brw_instruction *brw_MUL(struct brw_compile *p,
-                               struct brw_reg dest,
-                               struct brw_reg src0,
-                               struct brw_reg src1)
+                                struct brw_reg dest,
+                                struct brw_reg src0,
+                                struct brw_reg src1)
 {
    /* 6.32.38: mul */
    if (src0.type == BRW_REGISTER_TYPE_D ||
@@ -937,22 +780,22 @@ struct brw_instruction *brw_MUL(struct brw_compile *p,
 
    if (src0.type == BRW_REGISTER_TYPE_F ||
        (src0.file == BRW_IMMEDIATE_VALUE &&
-       src0.type == BRW_REGISTER_TYPE_VF)) {
+        src0.type == BRW_REGISTER_TYPE_VF)) {
       assert(src1.type != BRW_REGISTER_TYPE_UD);
       assert(src1.type != BRW_REGISTER_TYPE_D);
    }
 
    if (src1.type == BRW_REGISTER_TYPE_F ||
        (src1.file == BRW_IMMEDIATE_VALUE &&
-       src1.type == BRW_REGISTER_TYPE_VF)) {
+        src1.type == BRW_REGISTER_TYPE_VF)) {
       assert(src0.type != BRW_REGISTER_TYPE_UD);
       assert(src0.type != BRW_REGISTER_TYPE_D);
    }
 
    assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
-         src0.nr != BRW_ARF_ACCUMULATOR);
+          src0.nr != BRW_ARF_ACCUMULATOR);
    assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
-         src1.nr != BRW_ARF_ACCUMULATOR);
+          src1.nr != BRW_ARF_ACCUMULATOR);
 
    return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
 }
@@ -966,10 +809,6 @@ void brw_NOP(struct brw_compile *p)
    brw_set_src1(p, insn, brw_imm_ud(0x0));
 }
 
-
-
-
-
 /***********************************************************************
  * Comparisons, if/else/endif
  */
@@ -990,611 +829,15 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p,
    return insn;
 }
 
-static void
-push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
-{
-   p->if_stack[p->if_stack_depth] = inst - p->store;
-
-   p->if_stack_depth++;
-   if (p->if_stack_array_size <= p->if_stack_depth) {
-      p->if_stack_array_size *= 2;
-      p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
-                            p->if_stack_array_size);
-   }
-}
-
-static struct brw_instruction *
-pop_if_stack(struct brw_compile *p)
-{
-   p->if_stack_depth--;
-   return &p->store[p->if_stack[p->if_stack_depth]];
-}
-
-static void
-push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
-{
-   if (p->loop_stack_array_size < p->loop_stack_depth) {
-      p->loop_stack_array_size *= 2;
-      p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
-                              p->loop_stack_array_size);
-      p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
-                                    p->loop_stack_array_size);
-   }
-
-   p->loop_stack[p->loop_stack_depth] = inst - p->store;
-   p->loop_stack_depth++;
-   p->if_depth_in_loop[p->loop_stack_depth] = 0;
-}
-
-static struct brw_instruction *
-get_inner_do_insn(struct brw_compile *p)
-{
-   return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
-}
-
-/* EU takes the value from the flag register and pushes it onto some
- * sort of a stack (presumably merging with any flag value already on
- * the stack).  Within an if block, the flags at the top of the stack
- * control execution on each channel of the unit, eg. on each of the
- * 16 pixel values in our wm programs.
- *
- * When the matching 'else' instruction is reached (presumably by
- * countdown of the instruction count patched in by our ELSE/ENDIF
- * functions), the relevent flags are inverted.
- *
- * When the matching 'endif' instruction is reached, the flags are
- * popped off.  If the stack is now empty, normal execution resumes.
- */
-struct brw_instruction *
-brw_IF(struct brw_compile *p, GLuint execute_size)
-{
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn;
-
-   insn = next_insn(p, BRW_OPCODE_IF);
-
-   /* Override the defaults for this instruction:
-    */
-   if (intel->gen < 6) {
-      brw_set_dest(p, insn, brw_ip_reg());
-      brw_set_src0(p, insn, brw_ip_reg());
-      brw_set_src1(p, insn, brw_imm_d(0x0));
-   } else if (intel->gen == 6) {
-      brw_set_dest(p, insn, brw_imm_w(0));
-      insn->bits1.branch_gen6.jump_count = 0;
-      brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
-      brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
-   } else {
-      brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
-      brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
-      brw_set_src1(p, insn, brw_imm_ud(0));
-      insn->bits3.break_cont.jip = 0;
-      insn->bits3.break_cont.uip = 0;
-   }
-
-   insn->header.execution_size = execute_size;
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   insn->header.predicate_control = BRW_PREDICATE_NORMAL;
-   insn->header.mask_control = BRW_MASK_ENABLE;
-   if (!p->single_program_flow)
-      insn->header.thread_control = BRW_THREAD_SWITCH;
-
-   p->current->header.predicate_control = BRW_PREDICATE_NONE;
-
-   push_if_stack(p, insn);
-   p->if_depth_in_loop[p->loop_stack_depth]++;
-   return insn;
-}
-
-/* This function is only used for gen6-style IF instructions with an
- * embedded comparison (conditional modifier).  It is not used on gen7.
- */
-struct brw_instruction *
-gen6_IF(struct brw_compile *p, uint32_t conditional,
-       struct brw_reg src0, struct brw_reg src1)
-{
-   struct brw_instruction *insn;
-
-   insn = next_insn(p, BRW_OPCODE_IF);
-
-   brw_set_dest(p, insn, brw_imm_w(0));
-   if (p->compressed) {
-      insn->header.execution_size = BRW_EXECUTE_16;
-   } else {
-      insn->header.execution_size = BRW_EXECUTE_8;
-   }
-   insn->bits1.branch_gen6.jump_count = 0;
-   brw_set_src0(p, insn, src0);
-   brw_set_src1(p, insn, src1);
-
-   assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
-   assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
-   insn->header.destreg__conditionalmod = conditional;
-
-   if (!p->single_program_flow)
-      insn->header.thread_control = BRW_THREAD_SWITCH;
-
-   push_if_stack(p, insn);
-   return insn;
-}
-
-/**
- * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
- */
-static void
-convert_IF_ELSE_to_ADD(struct brw_compile *p,
-                      struct brw_instruction *if_inst,
-                      struct brw_instruction *else_inst)
-{
-   /* The next instruction (where the ENDIF would be, if it existed) */
-   struct brw_instruction *next_inst = &p->store[p->nr_insn];
-
-   assert(p->single_program_flow);
-   assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
-   assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
-   assert(if_inst->header.execution_size == BRW_EXECUTE_1);
-
-   /* Convert IF to an ADD instruction that moves the instruction pointer
-    * to the first instruction of the ELSE block.  If there is no ELSE
-    * block, point to where ENDIF would be.  Reverse the predicate.
-    *
-    * There's no need to execute an ENDIF since we don't need to do any
-    * stack operations, and if we're currently executing, we just want to
-    * continue normally.
-    */
-   if_inst->header.opcode = BRW_OPCODE_ADD;
-   if_inst->header.predicate_inverse = 1;
-
-   if (else_inst != NULL) {
-      /* Convert ELSE to an ADD instruction that points where the ENDIF
-       * would be.
-       */
-      else_inst->header.opcode = BRW_OPCODE_ADD;
-
-      if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
-      else_inst->bits3.ud = (next_inst - else_inst) * 16;
-   } else {
-      if_inst->bits3.ud = (next_inst - if_inst) * 16;
-   }
-}
-
-/**
- * Patch IF and ELSE instructions with appropriate jump targets.
- */
-static void
-patch_IF_ELSE(struct brw_compile *p,
-             struct brw_instruction *if_inst,
-             struct brw_instruction *else_inst,
-             struct brw_instruction *endif_inst)
-{
-   struct intel_context *intel = &p->brw->intel;
-
-   /* We shouldn't be patching IF and ELSE instructions in single program flow
-    * mode when gen < 6, because in single program flow mode on those
-    * platforms, we convert flow control instructions to conditional ADDs that
-    * operate on IP (see brw_ENDIF).
-    *
-    * However, on Gen6, writing to IP doesn't work in single program flow mode
-    * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
-    * not be updated by non-flow control instructions.").  And on later
-    * platforms, there is no significant benefit to converting control flow
-    * instructions to conditional ADDs.  So we do patch IF and ELSE
-    * instructions in single program flow mode on those platforms.
-    */
-   if (intel->gen < 6)
-      assert(!p->single_program_flow);
-
-   assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
-   assert(endif_inst != NULL);
-   assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
-
-   unsigned br = 1;
-   /* Jump count is for 64bit data chunk each, so one 128bit instruction
-    * requires 2 chunks.
-    */
-   if (intel->gen >= 5)
-      br = 2;
-
-   assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
-   endif_inst->header.execution_size = if_inst->header.execution_size;
-
-   if (else_inst == NULL) {
-      /* Patch IF -> ENDIF */
-      if (intel->gen < 6) {
-        /* Turn it into an IFF, which means no mask stack operations for
-         * all-false and jumping past the ENDIF.
-         */
-        if_inst->header.opcode = BRW_OPCODE_IFF;
-        if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
-        if_inst->bits3.if_else.pop_count = 0;
-        if_inst->bits3.if_else.pad0 = 0;
-      } else if (intel->gen == 6) {
-        /* As of gen6, there is no IFF and IF must point to the ENDIF. */
-        if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
-      } else {
-        if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
-        if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
-      }
-   } else {
-      else_inst->header.execution_size = if_inst->header.execution_size;
-
-      /* Patch IF -> ELSE */
-      if (intel->gen < 6) {
-        if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
-        if_inst->bits3.if_else.pop_count = 0;
-        if_inst->bits3.if_else.pad0 = 0;
-      } else if (intel->gen == 6) {
-        if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
-      }
-
-      /* Patch ELSE -> ENDIF */
-      if (intel->gen < 6) {
-        /* BRW_OPCODE_ELSE pre-gen6 should point just past the
-         * matching ENDIF.
-         */
-        else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
-        else_inst->bits3.if_else.pop_count = 1;
-        else_inst->bits3.if_else.pad0 = 0;
-      } else if (intel->gen == 6) {
-        /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
-        else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
-      } else {
-        /* The IF instruction's JIP should point just past the ELSE */
-        if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
-        /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
-        if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
-        else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
-      }
-   }
-}
-
-void
-brw_ELSE(struct brw_compile *p)
-{
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn;
-
-   insn = next_insn(p, BRW_OPCODE_ELSE);
-
-   if (intel->gen < 6) {
-      brw_set_dest(p, insn, brw_ip_reg());
-      brw_set_src0(p, insn, brw_ip_reg());
-      brw_set_src1(p, insn, brw_imm_d(0x0));
-   } else if (intel->gen == 6) {
-      brw_set_dest(p, insn, brw_imm_w(0));
-      insn->bits1.branch_gen6.jump_count = 0;
-      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   } else {
-      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src1(p, insn, brw_imm_ud(0));
-      insn->bits3.break_cont.jip = 0;
-      insn->bits3.break_cont.uip = 0;
-   }
-
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   insn->header.mask_control = BRW_MASK_ENABLE;
-   if (!p->single_program_flow)
-      insn->header.thread_control = BRW_THREAD_SWITCH;
-
-   push_if_stack(p, insn);
-}
-
-void
-brw_ENDIF(struct brw_compile *p)
-{
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn = NULL;
-   struct brw_instruction *else_inst = NULL;
-   struct brw_instruction *if_inst = NULL;
-   struct brw_instruction *tmp;
-   bool emit_endif = true;
-
-   /* In single program flow mode, we can express IF and ELSE instructions
-    * equivalently as ADD instructions that operate on IP.  On platforms prior
-    * to Gen6, flow control instructions cause an implied thread switch, so
-    * this is a significant savings.
-    *
-    * However, on Gen6, writing to IP doesn't work in single program flow mode
-    * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
-    * not be updated by non-flow control instructions.").  And on later
-    * platforms, there is no significant benefit to converting control flow
-    * instructions to conditional ADDs.  So we only do this trick on Gen4 and
-    * Gen5.
-    */
-   if (intel->gen < 6 && p->single_program_flow)
-      emit_endif = false;
-
-   /*
-    * A single next_insn() may change the base adress of instruction store
-    * memory(p->store), so call it first before referencing the instruction
-    * store pointer from an index
-    */
-   if (emit_endif)
-      insn = next_insn(p, BRW_OPCODE_ENDIF);
-
-   /* Pop the IF and (optional) ELSE instructions from the stack */
-   p->if_depth_in_loop[p->loop_stack_depth]--;
-   tmp = pop_if_stack(p);
-   if (tmp->header.opcode == BRW_OPCODE_ELSE) {
-      else_inst = tmp;
-      tmp = pop_if_stack(p);
-   }
-   if_inst = tmp;
-
-   if (!emit_endif) {
-      /* ENDIF is useless; don't bother emitting it. */
-      convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
-      return;
-   }
-
-   if (intel->gen < 6) {
-      brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
-      brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
-      brw_set_src1(p, insn, brw_imm_d(0x0));
-   } else if (intel->gen == 6) {
-      brw_set_dest(p, insn, brw_imm_w(0));
-      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   } else {
-      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src1(p, insn, brw_imm_ud(0));
-   }
-
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   insn->header.mask_control = BRW_MASK_ENABLE;
-   insn->header.thread_control = BRW_THREAD_SWITCH;
-
-   /* Also pop item off the stack in the endif instruction: */
-   if (intel->gen < 6) {
-      insn->bits3.if_else.jump_count = 0;
-      insn->bits3.if_else.pop_count = 1;
-      insn->bits3.if_else.pad0 = 0;
-   } else if (intel->gen == 6) {
-      insn->bits1.branch_gen6.jump_count = 2;
-   } else {
-      insn->bits3.break_cont.jip = 2;
-   }
-   patch_IF_ELSE(p, if_inst, else_inst, insn);
-}
-
-struct brw_instruction *brw_BREAK(struct brw_compile *p)
-{
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn;
-
-   insn = next_insn(p, BRW_OPCODE_BREAK);
-   if (intel->gen >= 6) {
-      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src1(p, insn, brw_imm_d(0x0));
-   } else {
-      brw_set_dest(p, insn, brw_ip_reg());
-      brw_set_src0(p, insn, brw_ip_reg());
-      brw_set_src1(p, insn, brw_imm_d(0x0));
-      insn->bits3.if_else.pad0 = 0;
-      insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
-   }
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   insn->header.execution_size = BRW_EXECUTE_8;
-
-   return insn;
-}
-
-struct brw_instruction *gen6_CONT(struct brw_compile *p)
-{
-   struct brw_instruction *insn;
-
-   insn = next_insn(p, BRW_OPCODE_CONTINUE);
-   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   brw_set_dest(p, insn, brw_ip_reg());
-   brw_set_src0(p, insn, brw_ip_reg());
-   brw_set_src1(p, insn, brw_imm_d(0x0));
-
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   insn->header.execution_size = BRW_EXECUTE_8;
-   return insn;
-}
-
-struct brw_instruction *brw_CONT(struct brw_compile *p)
-{
-   struct brw_instruction *insn;
-   insn = next_insn(p, BRW_OPCODE_CONTINUE);
-   brw_set_dest(p, insn, brw_ip_reg());
-   brw_set_src0(p, insn, brw_ip_reg());
-   brw_set_src1(p, insn, brw_imm_d(0x0));
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   insn->header.execution_size = BRW_EXECUTE_8;
-   /* insn->header.mask_control = BRW_MASK_DISABLE; */
-   insn->bits3.if_else.pad0 = 0;
-   insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
-   return insn;
-}
-
-struct brw_instruction *gen6_HALT(struct brw_compile *p)
-{
-   struct brw_instruction *insn;
-
-   insn = next_insn(p, BRW_OPCODE_HALT);
-   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
-
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   insn->header.execution_size = BRW_EXECUTE_8;
-   return insn;
-}
-
-/* DO/WHILE loop:
- *
- * The DO/WHILE is just an unterminated loop -- break or continue are
- * used for control within the loop.  We have a few ways they can be
- * done.
- *
- * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
- * jip and no DO instruction.
- *
- * For non-uniform control flow pre-gen6, there's a DO instruction to
- * push the mask, and a WHILE to jump back, and BREAK to get out and
- * pop the mask.
- *
- * For gen6, there's no more mask stack, so no need for DO.  WHILE
- * just points back to the first instruction of the loop.
- */
-struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
-{
-   struct intel_context *intel = &p->brw->intel;
-
-   if (intel->gen >= 6 || p->single_program_flow) {
-      push_loop_stack(p, &p->store[p->nr_insn]);
-      return &p->store[p->nr_insn];
-   } else {
-      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
-
-      push_loop_stack(p, insn);
-
-      /* Override the defaults for this instruction:
-       */
-      brw_set_dest(p, insn, brw_null_reg());
-      brw_set_src0(p, insn, brw_null_reg());
-      brw_set_src1(p, insn, brw_null_reg());
-
-      insn->header.compression_control = BRW_COMPRESSION_NONE;
-      insn->header.execution_size = execute_size;
-      insn->header.predicate_control = BRW_PREDICATE_NONE;
-      /* insn->header.mask_control = BRW_MASK_ENABLE; */
-      /* insn->header.mask_control = BRW_MASK_DISABLE; */
-
-      return insn;
-   }
-}
-
-/**
- * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
- * instruction here.
- *
- * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
- * nesting, since it can always just point to the end of the block/current loop.
- */
-static void
-brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
-{
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *do_inst = get_inner_do_insn(p);
-   struct brw_instruction *inst;
-   int br = (intel->gen == 5) ? 2 : 1;
-
-   for (inst = while_inst - 1; inst != do_inst; inst--) {
-      /* If the jump count is != 0, that means that this instruction has already
-       * been patched because it's part of a loop inside of the one we're
-       * patching.
-       */
-      if (inst->header.opcode == BRW_OPCODE_BREAK &&
-         inst->bits3.if_else.jump_count == 0) {
-        inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
-      } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
-                inst->bits3.if_else.jump_count == 0) {
-        inst->bits3.if_else.jump_count = br * (while_inst - inst);
-      }
-   }
-}
-
-struct brw_instruction *brw_WHILE(struct brw_compile *p)
-{
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn, *do_insn;
-   GLuint br = 1;
-
-   if (intel->gen >= 5)
-      br = 2;
-
-   if (intel->gen >= 7) {
-      insn = next_insn(p, BRW_OPCODE_WHILE);
-      do_insn = get_inner_do_insn(p);
-
-      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src1(p, insn, brw_imm_ud(0));
-      insn->bits3.break_cont.jip = br * (do_insn - insn);
-
-      insn->header.execution_size = BRW_EXECUTE_8;
-   } else if (intel->gen == 6) {
-      insn = next_insn(p, BRW_OPCODE_WHILE);
-      do_insn = get_inner_do_insn(p);
-
-      brw_set_dest(p, insn, brw_imm_w(0));
-      insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
-      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-
-      insn->header.execution_size = BRW_EXECUTE_8;
-   } else {
-      if (p->single_program_flow) {
-        insn = next_insn(p, BRW_OPCODE_ADD);
-         do_insn = get_inner_do_insn(p);
-
-        brw_set_dest(p, insn, brw_ip_reg());
-        brw_set_src0(p, insn, brw_ip_reg());
-        brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
-        insn->header.execution_size = BRW_EXECUTE_1;
-      } else {
-        insn = next_insn(p, BRW_OPCODE_WHILE);
-         do_insn = get_inner_do_insn(p);
-
-        assert(do_insn->header.opcode == BRW_OPCODE_DO);
-
-        brw_set_dest(p, insn, brw_ip_reg());
-        brw_set_src0(p, insn, brw_ip_reg());
-        brw_set_src1(p, insn, brw_imm_d(0));
-
-        insn->header.execution_size = do_insn->header.execution_size;
-        insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
-        insn->bits3.if_else.pop_count = 0;
-        insn->bits3.if_else.pad0 = 0;
-
-        brw_patch_break_cont(p, insn);
-      }
-   }
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
-   p->current->header.predicate_control = BRW_PREDICATE_NONE;
-
-   p->loop_stack_depth--;
-
-   return insn;
-}
-
-
-/* FORWARD JUMPS:
- */
-void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
-{
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
-   GLuint jmpi = 1;
-
-   if (intel->gen >= 5)
-      jmpi = 2;
-
-   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
-   assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
-
-   jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
-}
-
-
-
 /* To integrate with the above, it makes sense that the comparison
  * instruction should populate the flag register.  It might be simpler
  * just to use the flag reg for most WM tasks?
  */
 void brw_CMP(struct brw_compile *p,
-            struct brw_reg dest,
-            GLuint conditional,
-            struct brw_reg src0,
-            struct brw_reg src1)
+             struct brw_reg dest,
+             uint32_t conditional,
+             struct brw_reg src0,
+             struct brw_reg src1)
 {
    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
 
@@ -1637,41 +880,38 @@ void brw_WAIT (struct brw_compile *p)
  * Helpers for the various SEND message types:
  */
 
-/** Extended math function, float[8].
- */
-void brw_math( struct brw_compile *p,
-              struct brw_reg dest,
-              GLuint function,
-              GLuint saturate,
-              GLuint msg_reg_nr,
-              struct brw_reg src,
-              GLuint data_type,
-              GLuint precision )
+/** Extended math function, float[8].  */
+void brw_math(struct brw_compile *p,
+              struct brw_reg dest,
+              uint32_t function,
+              uint32_t saturate,
+              uint32_t msg_reg_nr,
+              struct brw_reg src,
+              uint32_t data_type,
+              uint32_t precision)
 {
-   struct intel_context *intel = &p->brw->intel;
-
-   if (intel->gen >= 6) {
+   if (p->gen >= 6) {
       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
 
       assert(dest.file == BRW_GENERAL_REGISTER_FILE);
       assert(src.file == BRW_GENERAL_REGISTER_FILE);
 
       assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
-      if (intel->gen == 6)
-        assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
+      if (p->gen == 6)
+         assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
 
       /* Source modifiers are ignored for extended math instructions on Gen6. */
-      if (intel->gen == 6) {
-        assert(!src.negate);
-        assert(!src.abs);
+      if (p->gen == 6) {
+         assert(!src.negate);
+         assert(!src.abs);
       }
 
       if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
-         function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
-         function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
-        assert(src.type != BRW_REGISTER_TYPE_F);
+          function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
+          function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
+         assert(src.type != BRW_REGISTER_TYPE_F);
       } else {
-        assert(src.type == BRW_REGISTER_TYPE_F);
+         assert(src.type == BRW_REGISTER_TYPE_F);
       }
 
       /* Math is the same ISA format as other opcodes, except that CondModifier
@@ -1695,28 +935,26 @@ void brw_math( struct brw_compile *p,
       brw_set_dest(p, insn, dest);
       brw_set_src0(p, insn, src);
       brw_set_math_message(p,
-                          insn,
-                          function,
-                          src.type == BRW_REGISTER_TYPE_D,
-                          precision,
-                          saturate,
-                          data_type);
+                           insn,
+                           function,
+                           src.type == BRW_REGISTER_TYPE_D,
+                           precision,
+                           saturate,
+                           data_type);
    }
 }
 
 /** Extended math function, float[8].
  */
 void brw_math2(struct brw_compile *p,
-              struct brw_reg dest,
-              GLuint function,
-              struct brw_reg src0,
-              struct brw_reg src1)
+               struct brw_reg dest,
+               uint32_t function,
+               struct brw_reg src0,
+               struct brw_reg src1)
 {
-   struct intel_context *intel = &p->brw->intel;
    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
 
-   assert(intel->gen >= 6);
-   (void) intel;
+   assert(p->gen >= 6);
 
 
    assert(dest.file == BRW_GENERAL_REGISTER_FILE);
@@ -1724,7 +962,7 @@ void brw_math2(struct brw_compile *p,
    assert(src1.file == BRW_GENERAL_REGISTER_FILE);
 
    assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
-   if (intel->gen == 6) {
+   if (p->gen == 6) {
       assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
       assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
    }
@@ -1740,7 +978,7 @@ void brw_math2(struct brw_compile *p,
    }
 
    /* Source modifiers are ignored for extended math instructions on Gen6. */
-   if (intel->gen == 6) {
+   if (p->gen == 6) {
       assert(!src0.negate);
       assert(!src0.abs);
       assert(!src1.negate);
@@ -1761,18 +999,17 @@ void brw_math2(struct brw_compile *p,
  * Extended math function, float[16].
  * Use 2 send instructions.
  */
-void brw_math_16( struct brw_compile *p,
-                 struct brw_reg dest,
-                 GLuint function,
-                 GLuint saturate,
-                 GLuint msg_reg_nr,
-                 struct brw_reg src,
-                 GLuint precision )
+void brw_math_16(struct brw_compile *p,
+                 struct brw_reg dest,
+                 uint32_t function,
+                 uint32_t saturate,
+                 uint32_t msg_reg_nr,
+                 struct brw_reg src,
+                 uint32_t precision)
 {
-   struct intel_context *intel = &p->brw->intel;
    struct brw_instruction *insn;
 
-   if (intel->gen >= 6) {
+   if (p->gen >= 6) {
       insn = next_insn(p, BRW_OPCODE_MATH);
 
       /* Math is the same ISA format as other opcodes, except that CondModifier
@@ -1803,12 +1040,12 @@ void brw_math_16( struct brw_compile *p,
    brw_set_dest(p, insn, dest);
    brw_set_src0(p, insn, src);
    brw_set_math_message(p,
-                       insn, 
-                       function,
-                       BRW_MATH_INTEGER_UNSIGNED,
-                       precision,
-                       saturate,
-                       BRW_MATH_DATA_VECTOR);
+                        insn, 
+                        function,
+                        BRW_MATH_INTEGER_UNSIGNED,
+                        precision,
+                        saturate,
+                        BRW_MATH_DATA_VECTOR);
 
    /* Second instruction:
     */
@@ -1819,12 +1056,12 @@ void brw_math_16( struct brw_compile *p,
    brw_set_dest(p, insn, offset(dest,1));
    brw_set_src0(p, insn, src);
    brw_set_math_message(p, 
-                       insn, 
-                       function,
-                       BRW_MATH_INTEGER_UNSIGNED,
-                       precision,
-                       saturate,
-                       BRW_MATH_DATA_VECTOR);
+                        insn, 
+                        function,
+                        BRW_MATH_INTEGER_UNSIGNED,
+                        precision,
+                        saturate,
+                        BRW_MATH_DATA_VECTOR);
 
    brw_pop_insn_state(p);
 }
@@ -1838,15 +1075,14 @@ void brw_math_16( struct brw_compile *p,
  * register spilling.
  */
 void brw_oword_block_write_scratch(struct brw_compile *p,
-                                  struct brw_reg mrf,
-                                  int num_regs,
-                                  GLuint offset)
+                                   struct brw_reg mrf,
+                                   int num_regs,
+                                   uint32_t offset)
 {
-   struct intel_context *intel = &p->brw->intel;
    uint32_t msg_control, msg_type;
    int mlen;
 
-   if (intel->gen >= 6)
+   if (p->gen >= 6)
       offset /= 16;
 
    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
@@ -1873,10 +1109,10 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
 
       /* set message header global offset field (reg 0, element 2) */
       brw_MOV(p,
-             retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
-                                 mrf.nr,
-                                 2), BRW_REGISTER_TYPE_UD),
-             brw_imm_ud(offset));
+              retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+                                  mrf.nr,
+                                  2), BRW_REGISTER_TYPE_UD),
+              brw_imm_ud(offset));
 
       brw_pop_insn_state(p);
    }
@@ -1886,11 +1122,11 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
       int send_commit_msg;
       struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
-                                        BRW_REGISTER_TYPE_UW);
+                                         BRW_REGISTER_TYPE_UW);
 
       if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
-        insn->header.compression_control = BRW_COMPRESSION_NONE;
-        src_header = vec16(src_header);
+         insn->header.compression_control = BRW_COMPRESSION_NONE;
+         src_header = vec16(src_header);
       }
       assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
       insn->header.destreg__conditionalmod = mrf.nr;
@@ -1905,37 +1141,37 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
        * protection.  Our use of DP writes is all about register
        * spilling within a thread.
        */
-      if (intel->gen >= 6) {
-        dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
-        send_commit_msg = 0;
+      if (p->gen >= 6) {
+         dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+         send_commit_msg = 0;
       } else {
-        dest = src_header;
-        send_commit_msg = 1;
+         dest = src_header;
+         send_commit_msg = 1;
       }
 
       brw_set_dest(p, insn, dest);
-      if (intel->gen >= 6) {
-        brw_set_src0(p, insn, mrf);
+      if (p->gen >= 6) {
+         brw_set_src0(p, insn, mrf);
       } else {
-        brw_set_src0(p, insn, brw_null_reg());
+         brw_set_src0(p, insn, brw_null_reg());
       }
 
-      if (intel->gen >= 6)
-        msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
+      if (p->gen >= 6)
+         msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
       else
-        msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
+         msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
 
       brw_set_dp_write_message(p,
-                              insn,
-                              255, /* binding table index (255=stateless) */
-                              msg_control,
-                              msg_type,
-                              mlen,
-                              true, /* header_present */
-                              0, /* not a render target */
-                              send_commit_msg, /* response_length */
-                              0, /* eot */
-                              send_commit_msg);
+                               insn,
+                               255, /* binding table index (255=stateless) */
+                               msg_control,
+                               msg_type,
+                               mlen,
+                               true, /* header_present */
+                               0, /* not a render target */
+                               send_commit_msg, /* response_length */
+                               0, /* eot */
+                               send_commit_msg);
    }
 }
 
@@ -1949,16 +1185,15 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
  */
 void
 brw_oword_block_read_scratch(struct brw_compile *p,
-                            struct brw_reg dest,
-                            struct brw_reg mrf,
-                            int num_regs,
-                            GLuint offset)
+                             struct brw_reg dest,
+                             struct brw_reg mrf,
+                             int num_regs,
+                             uint32_t offset)
 {
-   struct intel_context *intel = &p->brw->intel;
    uint32_t msg_control;
    int rlen;
 
-   if (intel->gen >= 6)
+   if (p->gen >= 6)
       offset /= 16;
 
    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
@@ -1981,10 +1216,10 @@ brw_oword_block_read_scratch(struct brw_compile *p,
 
       /* set message header global offset field (reg 0, element 2) */
       brw_MOV(p,
-             retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
-                                 mrf.nr,
-                                 2), BRW_REGISTER_TYPE_UD),
-             brw_imm_ud(offset));
+              retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+                                  mrf.nr,
+                                  2), BRW_REGISTER_TYPE_UD),
+              brw_imm_ud(offset));
 
       brw_pop_insn_state(p);
    }
@@ -1996,21 +1231,21 @@ brw_oword_block_read_scratch(struct brw_compile *p,
       insn->header.compression_control = BRW_COMPRESSION_NONE;
       insn->header.destreg__conditionalmod = mrf.nr;
 
-      brw_set_dest(p, insn, dest);     /* UW? */
-      if (intel->gen >= 6) {
-        brw_set_src0(p, insn, mrf);
+      brw_set_dest(p, insn, dest);        /* UW? */
+      if (p->gen >= 6) {
+         brw_set_src0(p, insn, mrf);
       } else {
-        brw_set_src0(p, insn, brw_null_reg());
+         brw_set_src0(p, insn, brw_null_reg());
       }
 
       brw_set_dp_read_message(p,
-                             insn,
-                             255, /* binding table index (255=stateless) */
-                             msg_control,
-                             BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
-                             BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
-                             1, /* msg_length */
-                             rlen);
+                              insn,
+                              255, /* binding table index (255=stateless) */
+                              msg_control,
+                              BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+                              BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
+                              1, /* msg_length */
+                              rlen);
    }
 }
 
@@ -2020,15 +1255,14 @@ brw_oword_block_read_scratch(struct brw_compile *p,
  * Used for fetching shader constants.
  */
 void brw_oword_block_read(struct brw_compile *p,
-                         struct brw_reg dest,
-                         struct brw_reg mrf,
-                         uint32_t offset,
-                         uint32_t bind_table_index)
+                          struct brw_reg dest,
+                          struct brw_reg mrf,
+                          uint32_t offset,
+                          uint32_t bind_table_index)
 {
-   struct intel_context *intel = &p->brw->intel;
 
    /* On newer hardware, offset is in units of owords. */
-   if (intel->gen >= 6)
+   if (p->gen >= 6)
       offset /= 16;
 
    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
@@ -2042,10 +1276,10 @@ void brw_oword_block_read(struct brw_compile *p,
 
    /* set message header global offset field (reg 0, element 2) */
    brw_MOV(p,
-          retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
-                              mrf.nr,
-                              2), BRW_REGISTER_TYPE_UD),
-          brw_imm_ud(offset));
+           retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+                               mrf.nr,
+                               2), BRW_REGISTER_TYPE_UD),
+           brw_imm_ud(offset));
 
    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
    insn->header.destreg__conditionalmod = mrf.nr;
@@ -2054,20 +1288,20 @@ void brw_oword_block_read(struct brw_compile *p,
    dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
 
    brw_set_dest(p, insn, dest);
-   if (intel->gen >= 6) {
+   if (p->gen >= 6) {
       brw_set_src0(p, insn, mrf);
    } else {
       brw_set_src0(p, insn, brw_null_reg());
    }
 
    brw_set_dp_read_message(p,
-                          insn,
-                          bind_table_index,
-                          BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
-                          BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
-                          BRW_DATAPORT_READ_TARGET_DATA_CACHE,
-                          1, /* msg_length */
-                          1); /* response_length (1 reg, 2 owords!) */
+                           insn,
+                           bind_table_index,
+                           BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+                           BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
+                           BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+                           1, /* msg_length */
+                           1); /* response_length (1 reg, 2 owords!) */
 
    brw_pop_insn_state(p);
 }
@@ -2079,9 +1313,9 @@ void brw_oword_block_read(struct brw_compile *p,
  * the provided mrf header reg.
  */
 void brw_dword_scattered_read(struct brw_compile *p,
-                             struct brw_reg dest,
-                             struct brw_reg mrf,
-                             uint32_t bind_table_index)
+                              struct brw_reg dest,
+                              struct brw_reg mrf,
+                              uint32_t bind_table_index)
 {
    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
 
@@ -2102,17 +1336,15 @@ void brw_dword_scattered_read(struct brw_compile *p,
    brw_set_src0(p, insn, brw_null_reg());
 
    brw_set_dp_read_message(p,
-                          insn,
-                          bind_table_index,
-                          BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
-                          BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
-                          BRW_DATAPORT_READ_TARGET_DATA_CACHE,
-                          2, /* msg_length */
-                          1); /* response_length */
+                           insn,
+                           bind_table_index,
+                           BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
+                           BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
+                           BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+                           2, /* msg_length */
+                           1); /* response_length */
 }
 
-
-
 /**
  * Read float[4] constant(s) from VS constant buffer.
  * For relative addressing, two float[4] constants will be read into 'dest'.
@@ -2120,14 +1352,13 @@ void brw_dword_scattered_read(struct brw_compile *p,
  */
 void brw_dp_READ_4_vs(struct brw_compile *p,
                       struct brw_reg dest,
-                      GLuint location,
-                      GLuint bind_table_index)
+                      uint32_t location,
+                      uint32_t bind_table_index)
 {
-   struct intel_context *intel = &p->brw->intel;
    struct brw_instruction *insn;
-   GLuint msg_reg_nr = 1;
+   uint32_t msg_reg_nr = 1;
 
-   if (intel->gen >= 6)
+   if (p->gen >= 6)
       location /= 16;
 
    /* Setup MRF[1] with location/offset into const buffer */
@@ -2137,8 +1368,8 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
-                    BRW_REGISTER_TYPE_UD),
-          brw_imm_ud(location));
+                     BRW_REGISTER_TYPE_UD),
+           brw_imm_ud(location));
    brw_pop_insn_state(p);
 
    insn = next_insn(p, BRW_OPCODE_SEND);
@@ -2149,20 +1380,20 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
    insn->header.mask_control = BRW_MASK_DISABLE;
 
    brw_set_dest(p, insn, dest);
-   if (intel->gen >= 6) {
+   if (p->gen >= 6) {
       brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
    } else {
       brw_set_src0(p, insn, brw_null_reg());
    }
 
    brw_set_dp_read_message(p,
-                          insn,
-                          bind_table_index,
-                          0,
-                          BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
-                          BRW_DATAPORT_READ_TARGET_DATA_CACHE,
-                          1, /* msg_length */
-                          1); /* response_length (1 Oword) */
+                           insn,
+                           bind_table_index,
+                           0,
+                           BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+                           BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+                           1, /* msg_length */
+                           1); /* response_length (1 Oword) */
 }
 
 /**
@@ -2170,12 +1401,11 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
  * relative addressing.
  */
 void brw_dp_READ_4_vs_relative(struct brw_compile *p,
-                              struct brw_reg dest,
-                              struct brw_reg addr_reg,
-                              GLuint offset,
-                              GLuint bind_table_index)
+                               struct brw_reg dest,
+                               struct brw_reg addr_reg,
+                               uint32_t offset,
+                               uint32_t bind_table_index)
 {
-   struct intel_context *intel = &p->brw->intel;
    struct brw_reg src = brw_vec8_grf(0, 0);
    int msg_type;
 
@@ -2190,7 +1420,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
     * fields ignored.
     */
    brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
-          addr_reg, brw_imm_d(offset));
+           addr_reg, brw_imm_d(offset));
    brw_pop_insn_state(p);
 
    gen6_resolve_implied_move(p, &src, 0);
@@ -2204,38 +1434,30 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
    brw_set_dest(p, insn, dest);
    brw_set_src0(p, insn, src);
 
-   if (intel->gen >= 6)
-      msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
-   else if (intel->gen == 5 || intel->is_g4x)
-      msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
-   else
-      msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+   msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
 
    brw_set_dp_read_message(p,
-                          insn,
-                          bind_table_index,
-                          BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
-                          msg_type,
-                          BRW_DATAPORT_READ_TARGET_DATA_CACHE,
-                          2, /* msg_length */
-                          1); /* response_length */
+                           insn,
+                           bind_table_index,
+                           BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+                           msg_type,
+                           BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+                           2, /* msg_length */
+                           1); /* response_length */
 }
 
-
-
 void brw_fb_WRITE(struct brw_compile *p,
-                 int dispatch_width,
-                  GLuint msg_reg_nr,
+                  int dispatch_width,
+                  uint32_t msg_reg_nr,
                   struct brw_reg src0,
-                  GLuint binding_table_index,
-                  GLuint msg_length,
-                  GLuint response_length,
+                  uint32_t binding_table_index,
+                  uint32_t msg_length,
+                  uint32_t response_length,
                   bool eot,
                   bool header_present)
 {
-   struct intel_context *intel = &p->brw->intel;
    struct brw_instruction *insn;
-   GLuint msg_control, msg_type;
+   uint32_t msg_control, msg_type;
    struct brw_reg dest;
 
    if (dispatch_width == 16)
@@ -2243,7 +1465,7 @@ void brw_fb_WRITE(struct brw_compile *p,
    else
       dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
 
-   if (intel->gen >= 6 && binding_table_index == 0) {
+   if (p->gen >= 6 && binding_table_index == 0) {
       insn = next_insn(p, BRW_OPCODE_SENDC);
    } else {
       insn = next_insn(p, BRW_OPCODE_SEND);
@@ -2252,7 +1474,7 @@ void brw_fb_WRITE(struct brw_compile *p,
    insn->header.predicate_control = 0;
    insn->header.compression_control = BRW_COMPRESSION_NONE;
 
-   if (intel->gen >= 6) {
+   if (p->gen >= 6) {
       /* headerless version, just submit color payload */
       src0 = brw_message_reg(msg_reg_nr);
 
@@ -2271,16 +1493,16 @@ void brw_fb_WRITE(struct brw_compile *p,
    brw_set_dest(p, insn, dest);
    brw_set_src0(p, insn, src0);
    brw_set_dp_write_message(p,
-                           insn,
-                           binding_table_index,
-                           msg_control,
-                           msg_type,
-                           msg_length,
-                           header_present,
-                           eot, /* last render target write */
-                           response_length,
-                           eot,
-                           0 /* send_commit_msg */);
+                            insn,
+                            binding_table_index,
+                            msg_control,
+                            msg_type,
+                            msg_length,
+                            header_present,
+                            eot, /* last render target write */
+                            response_length,
+                            eot,
+                            0 /* send_commit_msg */);
 }
 
 
@@ -2290,20 +1512,19 @@ void brw_fb_WRITE(struct brw_compile *p,
  * of sampling operation is performed.  See volume 4, page 161 of docs.
  */
 void brw_SAMPLE(struct brw_compile *p,
-               struct brw_reg dest,
-               GLuint msg_reg_nr,
-               struct brw_reg src0,
-               GLuint binding_table_index,
-               GLuint sampler,
-               GLuint writemask,
-               GLuint msg_type,
-               GLuint response_length,
-               GLuint msg_length,
-               GLuint header_present,
-               GLuint simd_mode,
-               GLuint return_format)
+                struct brw_reg dest,
+                uint32_t msg_reg_nr,
+                struct brw_reg src0,
+                uint32_t binding_table_index,
+                uint32_t sampler,
+                uint32_t writemask,
+                uint32_t msg_type,
+                uint32_t response_length,
+                uint32_t msg_length,
+                uint32_t header_present,
+                uint32_t simd_mode,
+                uint32_t return_format)
 {
-   struct intel_context *intel = &p->brw->intel;
    bool need_stall = 0;
 
    if (writemask == 0) {
@@ -2322,56 +1543,56 @@ void brw_SAMPLE(struct brw_compile *p,
     * needed.
     */
    if (writemask != WRITEMASK_XYZW) {
-      GLuint dst_offset = 0;
-      GLuint i, newmask = 0, len = 0;
+      uint32_t dst_offset = 0;
+      uint32_t i, newmask = 0, len = 0;
 
       for (i = 0; i < 4; i++) {
-        if (writemask & (1<<i))
-           break;
-        dst_offset += 2;
+         if (writemask & (1<<i))
+            break;
+         dst_offset += 2;
       }
       for (; i < 4; i++) {
-        if (!(writemask & (1<<i)))
-           break;
-        newmask |= 1<<i;
-        len++;
+         if (!(writemask & (1<<i)))
+            break;
+         newmask |= 1<<i;
+         len++;
       }
 
       if (newmask != writemask) {
-        need_stall = 1;
+         need_stall = 1;
          /* printf("need stall %x %x\n", newmask , writemask); */
       }
       else {
-        bool dispatch_16 = false;
+         bool dispatch_16 = false;
 
-        struct brw_reg m1 = brw_message_reg(msg_reg_nr);
+         struct brw_reg m1 = brw_message_reg(msg_reg_nr);
 
-        guess_execution_size(p, p->current, dest);
-        if (p->current->header.execution_size == BRW_EXECUTE_16)
-           dispatch_16 = true;
+         guess_execution_size(p, p->current, dest);
+         if (p->current->header.execution_size == BRW_EXECUTE_16)
+            dispatch_16 = true;
 
-        newmask = ~newmask & WRITEMASK_XYZW;
+         newmask = ~newmask & WRITEMASK_XYZW;
 
-        brw_push_insn_state(p);
+         brw_push_insn_state(p);
 
-        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-        brw_set_mask_control(p, BRW_MASK_DISABLE);
+         brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+         brw_set_mask_control(p, BRW_MASK_DISABLE);
 
-        brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
-                retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
-        brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 
+         brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
+                 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
+           brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 
 
-        brw_pop_insn_state(p);
+         brw_pop_insn_state(p);
 
-        src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 
-        dest = offset(dest, dst_offset);
+           src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 
+         dest = offset(dest, dst_offset);
 
-        /* For 16-wide dispatch, masked channels are skipped in the
-         * response.  For 8-wide, masked channels still take up slots,
-         * and are just not written to.
-         */
-        if (dispatch_16)
-           response_length = len * 2;
+         /* For 16-wide dispatch, masked channels are skipped in the
+          * response.  For 8-wide, masked channels still take up slots,
+          * and are just not written to.
+          */
+         if (dispatch_16)
+            response_length = len * 2;
       }
    }
 
@@ -2383,20 +1604,20 @@ void brw_SAMPLE(struct brw_compile *p,
       insn = next_insn(p, BRW_OPCODE_SEND);
       insn->header.predicate_control = 0; /* XXX */
       insn->header.compression_control = BRW_COMPRESSION_NONE;
-      if (intel->gen < 6)
-         insn->header.destreg__conditionalmod = msg_reg_nr;
+      if (p->gen < 6)
+          insn->header.destreg__conditionalmod = msg_reg_nr;
 
       brw_set_dest(p, insn, dest);
       brw_set_src0(p, insn, src0);
       brw_set_sampler_message(p, insn,
-                             binding_table_index,
-                             sampler,
-                             msg_type,
-                             response_length, 
-                             msg_length,
-                             header_present,
-                             simd_mode,
-                             return_format);
+                              binding_table_index,
+                              sampler,
+                              msg_type,
+                              response_length, 
+                              msg_length,
+                              header_present,
+                              simd_mode,
+                              return_format);
    }
 
    if (need_stall) {
@@ -2407,238 +1628,9 @@ void brw_SAMPLE(struct brw_compile *p,
       brw_push_insn_state(p);
       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
       brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
-             retype(reg, BRW_REGISTER_TYPE_UD));
+              retype(reg, BRW_REGISTER_TYPE_UD));
       brw_pop_insn_state(p);
    }
 
 }
 
-/* All these variables are pretty confusing - we might be better off
- * using bitmasks and macros for this, in the old style.  Or perhaps
- * just having the caller instantiate the fields in dword3 itself.
- */
-void brw_urb_WRITE(struct brw_compile *p,
-                  struct brw_reg dest,
-                  GLuint msg_reg_nr,
-                  struct brw_reg src0,
-                  bool allocate,
-                  bool used,
-                  GLuint msg_length,
-                  GLuint response_length,
-                  bool eot,
-                  bool writes_complete,
-                  GLuint offset,
-                  GLuint swizzle)
-{
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn;
-
-   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
-
-   if (intel->gen == 7) {
-      /* Enable Channel Masks in the URB_WRITE_HWORD message header */
-      brw_push_insn_state(p);
-      brw_set_access_mode(p, BRW_ALIGN_1);
-      brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
-                      BRW_REGISTER_TYPE_UD),
-               retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
-               brw_imm_ud(0xff00));
-      brw_pop_insn_state(p);
-   }
-
-   insn = next_insn(p, BRW_OPCODE_SEND);
-
-   assert(msg_length < BRW_MAX_MRF);
-
-   brw_set_dest(p, insn, dest);
-   brw_set_src0(p, insn, src0);
-   brw_set_src1(p, insn, brw_imm_d(0));
-
-   if (intel->gen < 6)
-      insn->header.destreg__conditionalmod = msg_reg_nr;
-
-   brw_set_urb_message(p,
-                      insn,
-                      allocate,
-                      used,
-                      msg_length,
-                      response_length, 
-                      eot, 
-                      writes_complete, 
-                      offset,
-                      swizzle);
-}
-
-static int
-brw_find_next_block_end(struct brw_compile *p, int start)
-{
-   int ip;
-
-   for (ip = start + 1; ip < p->nr_insn; ip++) {
-      struct brw_instruction *insn = &p->store[ip];
-
-      switch (insn->header.opcode) {
-      case BRW_OPCODE_ENDIF:
-      case BRW_OPCODE_ELSE:
-      case BRW_OPCODE_WHILE:
-        return ip;
-      }
-   }
-
-   return 0;
-}
-
-/* There is no DO instruction on gen6, so to find the end of the loop
- * we have to see if the loop is jumping back before our start
- * instruction.
- */
-static int
-brw_find_loop_end(struct brw_compile *p, int start)
-{
-   struct intel_context *intel = &p->brw->intel;
-   int ip;
-   int br = 2;
-
-   for (ip = start + 1; ip < p->nr_insn; ip++) {
-      struct brw_instruction *insn = &p->store[ip];
-
-      if (insn->header.opcode == BRW_OPCODE_WHILE) {
-        int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
-                                  : insn->bits3.break_cont.jip;
-        if (ip + jip / br <= start)
-           return ip;
-      }
-   }
-   assert(!"not reached");
-   return start + 1;
-}
-
-/* After program generation, go back and update the UIP and JIP of
- * BREAK, CONT, and HALT instructions to their correct locations.
- */
-void
-brw_set_uip_jip(struct brw_compile *p)
-{
-   struct intel_context *intel = &p->brw->intel;
-   int ip;
-   int br = 2;
-
-   if (intel->gen < 6)
-      return;
-
-   for (ip = 0; ip < p->nr_insn; ip++) {
-      struct brw_instruction *insn = &p->store[ip];
-      int block_end_ip = 0;
-
-      if (insn->header.opcode == BRW_OPCODE_BREAK ||
-         insn->header.opcode == BRW_OPCODE_CONTINUE ||
-         insn->header.opcode == BRW_OPCODE_HALT) {
-        block_end_ip = brw_find_next_block_end(p, ip);
-      }
-
-      switch (insn->header.opcode) {
-      case BRW_OPCODE_BREAK:
-        assert(block_end_ip != 0);
-        insn->bits3.break_cont.jip = br * (block_end_ip - ip);
-        /* Gen7 UIP points to WHILE; Gen6 points just after it */
-        insn->bits3.break_cont.uip =
-           br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
-        break;
-      case BRW_OPCODE_CONTINUE:
-        assert(block_end_ip != 0);
-        insn->bits3.break_cont.jip = br * (block_end_ip - ip);
-        insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
-
-        assert(insn->bits3.break_cont.uip != 0);
-        assert(insn->bits3.break_cont.jip != 0);
-        break;
-      case BRW_OPCODE_HALT:
-        /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
-         *
-         *    "In case of the halt instruction not inside any conditional code
-         *     block, the value of <JIP> and <UIP> should be the same. In case
-         *     of the halt instruction inside conditional code block, the <UIP>
-         *     should be the end of the program, and the <JIP> should be end of
-         *     the most inner conditional code block."
-         *
-         * The uip will have already been set by whoever set up the
-         * instruction.
-         */
-        if (block_end_ip == 0) {
-           insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
-        } else {
-           insn->bits3.break_cont.jip = br * (block_end_ip - ip);
-        }
-        assert(insn->bits3.break_cont.uip != 0);
-        assert(insn->bits3.break_cont.jip != 0);
-        break;
-      }
-   }
-}
-
-void brw_ff_sync(struct brw_compile *p,
-                  struct brw_reg dest,
-                  GLuint msg_reg_nr,
-                  struct brw_reg src0,
-                  bool allocate,
-                  GLuint response_length,
-                  bool eot)
-{
-   struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn;
-
-   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
-
-   insn = next_insn(p, BRW_OPCODE_SEND);
-   brw_set_dest(p, insn, dest);
-   brw_set_src0(p, insn, src0);
-   brw_set_src1(p, insn, brw_imm_d(0));
-
-   if (intel->gen < 6)
-      insn->header.destreg__conditionalmod = msg_reg_nr;
-
-   brw_set_ff_sync_message(p,
-                          insn,
-                          allocate,
-                          response_length,
-                          eot);
-}
-
-/**
- * Emit the SEND instruction necessary to generate stream output data on Gen6
- * (for transform feedback).
- *
- * If send_commit_msg is true, this is the last piece of stream output data
- * from this thread, so send the data as a committed write.  According to the
- * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
- *
- *   "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
- *   writes are complete by sending the final write as a committed write."
- */
-void
-brw_svb_write(struct brw_compile *p,
-              struct brw_reg dest,
-              GLuint msg_reg_nr,
-              struct brw_reg src0,
-              GLuint binding_table_index,
-              bool   send_commit_msg)
-{
-   struct brw_instruction *insn;
-
-   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
-
-   insn = next_insn(p, BRW_OPCODE_SEND);
-   brw_set_dest(p, insn, dest);
-   brw_set_src0(p, insn, src0);
-   brw_set_src1(p, insn, brw_imm_d(0));
-   brw_set_dp_write_message(p, insn,
-                            binding_table_index,
-                            0, /* msg_control: ignored */
-                            GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
-                            1, /* msg_length */
-                            true, /* header_present */
-                            0, /* last_render_target: ignored */
-                            send_commit_msg, /* response_length */
-                            0, /* end_of_thread */
-                            send_commit_msg); /* send_commit_msg */
-}
diff --git a/backend/src/gen/brw_structs.h b/backend/src/gen/brw_structs.h
new file mode 100644 (file)
index 0000000..ca9071a
--- /dev/null
@@ -0,0 +1,758 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#ifndef BRW_STRUCTS_H
+#define BRW_STRUCTS_H
+
+#include <stdint.h>
+
+/** Number of general purpose registers (VS, WM, etc) */
+#define BRW_MAX_GRF 128
+
+/**
+ * First GRF used for the MRF hack.
+ *
+ * On gen7, MRFs are no longer used, and contiguous GRFs are used instead.  We
+ * haven't converted our compiler to be aware of this, so it asks for MRFs and
+ * brw_eu_emit.c quietly converts them to be accesses of the top GRFs.  The
+ * register allocators have to be careful of this to avoid corrupting the "MRF"s
+ * with actual GRF allocations.
+ */
+#define GEN7_MRF_HACK_START 112.
+
+/** Number of message register file registers */
+#define BRW_MAX_MRF 16
+
+struct brw_urb_immediate {
+   uint32_t opcode:4;
+   uint32_t offset:6;
+   uint32_t swizzle_control:2; 
+   uint32_t pad:1;
+   uint32_t allocate:1;
+   uint32_t used:1;
+   uint32_t complete:1;
+   uint32_t response_length:4;
+   uint32_t msg_length:4;
+   uint32_t msg_target:4;
+   uint32_t pad1:3;
+   uint32_t end_of_thread:1;
+};
+
+struct brw_sampler_state
+{
+   struct
+   {
+      uint32_t shadow_function:3;
+      uint32_t lod_bias:11;
+      uint32_t min_filter:3;
+      uint32_t mag_filter:3;
+      uint32_t mip_filter:2;
+      uint32_t base_level:5;
+      uint32_t min_mag_neq:1;
+      uint32_t lod_preclamp:1;
+      uint32_t default_color_mode:1;
+      uint32_t pad0:1;
+      uint32_t disable:1;
+   } ss0;
+
+   struct
+   {
+      uint32_t r_wrap_mode:3;
+      uint32_t t_wrap_mode:3;
+      uint32_t s_wrap_mode:3;
+      uint32_t cube_control_mode:1;
+      uint32_t pad:2;
+      uint32_t max_lod:10;
+      uint32_t min_lod:10;
+   } ss1;
+
+   struct
+   {
+      uint32_t pad:5;
+      uint32_t default_color_pointer:27;
+   } ss2;
+
+   struct
+   {
+      uint32_t non_normalized_coord:1;
+      uint32_t pad:12;
+      uint32_t address_round:6;
+      uint32_t max_aniso:3;
+      uint32_t chroma_key_mode:1;
+      uint32_t chroma_key_index:2;
+      uint32_t chroma_key_enable:1;
+      uint32_t monochrome_filter_width:3;
+      uint32_t monochrome_filter_height:3;
+   } ss3;
+};
+
+struct gen7_sampler_state
+{
+   struct
+   {
+      uint32_t aniso_algorithm:1;
+      uint32_t lod_bias:13;
+      uint32_t min_filter:3;
+      uint32_t mag_filter:3;
+      uint32_t mip_filter:2;
+      uint32_t base_level:5;
+      uint32_t pad1:1;
+      uint32_t lod_preclamp:1;
+      uint32_t default_color_mode:1;
+      uint32_t pad0:1;
+      uint32_t disable:1;
+   } ss0;
+
+   struct
+   {
+      uint32_t cube_control_mode:1;
+      uint32_t shadow_function:3;
+      uint32_t pad:4;
+      uint32_t max_lod:12;
+      uint32_t min_lod:12;
+   } ss1;
+
+   struct
+   {
+      uint32_t pad:5;
+      uint32_t default_color_pointer:27;
+   } ss2;
+
+   struct
+   {
+      uint32_t r_wrap_mode:3;
+      uint32_t t_wrap_mode:3;
+      uint32_t s_wrap_mode:3;
+      uint32_t pad:1;
+      uint32_t non_normalized_coord:1;
+      uint32_t trilinear_quality:2;
+      uint32_t address_round:6;
+      uint32_t max_aniso:3;
+      uint32_t chroma_key_mode:1;
+      uint32_t chroma_key_index:2;
+      uint32_t chroma_key_enable:1;
+      uint32_t pad0:6;
+   } ss3;
+};
+
+/* Instruction format for the execution units */
+struct brw_instruction
+{
+   struct
+   {
+      uint32_t opcode:7;
+      uint32_t pad:1;
+      uint32_t access_mode:1;
+      uint32_t mask_control:1;
+      uint32_t dependency_control:2;
+      uint32_t compression_control:2; /* gen6: quater control */
+      uint32_t thread_control:2;
+      uint32_t predicate_control:4;
+      uint32_t predicate_inverse:1;
+      uint32_t execution_size:3;
+      /**
+       * Conditional Modifier for most instructions.  On Gen6+, this is also
+       * used for the SEND instruction's Message Target/SFID.
+       */
+      uint32_t destreg__conditionalmod:4;
+      uint32_t acc_wr_control:1;
+      uint32_t cmpt_control:1;
+      uint32_t debug_control:1;
+      uint32_t saturate:1;
+   } header;
+
+   union {
+      struct
+      {
+         uint32_t dest_reg_file:2;
+         uint32_t dest_reg_type:3;
+         uint32_t src0_reg_file:2;
+         uint32_t src0_reg_type:3;
+         uint32_t src1_reg_file:2;
+         uint32_t src1_reg_type:3;
+         uint32_t pad:1;
+         uint32_t dest_subreg_nr:5;
+         uint32_t dest_reg_nr:8;
+         uint32_t dest_horiz_stride:2;
+         uint32_t dest_address_mode:1;
+      } da1;
+
+      struct
+      {
+         uint32_t dest_reg_file:2;
+         uint32_t dest_reg_type:3;
+         uint32_t src0_reg_file:2;
+         uint32_t src0_reg_type:3;
+         uint32_t src1_reg_file:2;        /* 0x00000c00 */
+         uint32_t src1_reg_type:3;        /* 0x00007000 */
+         uint32_t pad:1;
+         int dest_indirect_offset:10;        /* offset against the deref'd address reg */
+         uint32_t dest_subreg_nr:3; /* subnr for the address reg a0.x */
+         uint32_t dest_horiz_stride:2;
+         uint32_t dest_address_mode:1;
+      } ia1;
+
+      struct
+      {
+         uint32_t dest_reg_file:2;
+         uint32_t dest_reg_type:3;
+         uint32_t src0_reg_file:2;
+         uint32_t src0_reg_type:3;
+         uint32_t src1_reg_file:2;
+         uint32_t src1_reg_type:3;
+         uint32_t pad:1;
+         uint32_t dest_writemask:4;
+         uint32_t dest_subreg_nr:1;
+         uint32_t dest_reg_nr:8;
+         uint32_t dest_horiz_stride:2;
+         uint32_t dest_address_mode:1;
+      } da16;
+
+      struct
+      {
+         uint32_t dest_reg_file:2;
+         uint32_t dest_reg_type:3;
+         uint32_t src0_reg_file:2;
+         uint32_t src0_reg_type:3;
+         uint32_t pad0:6;
+         uint32_t dest_writemask:4;
+         int dest_indirect_offset:6;
+         uint32_t dest_subreg_nr:3;
+         uint32_t dest_horiz_stride:2;
+         uint32_t dest_address_mode:1;
+      } ia16;
+
+      struct {
+         uint32_t dest_reg_file:2;
+         uint32_t dest_reg_type:3;
+         uint32_t src0_reg_file:2;
+         uint32_t src0_reg_type:3;
+         uint32_t src1_reg_file:2;
+         uint32_t src1_reg_type:3;
+         uint32_t pad:1;
+
+         int jump_count:16;
+      } branch_gen6;
+
+      struct {
+         uint32_t dest_reg_file:1;
+         uint32_t flag_subreg_num:1;
+         uint32_t pad0:2;
+         uint32_t src0_abs:1;
+         uint32_t src0_negate:1;
+         uint32_t src1_abs:1;
+         uint32_t src1_negate:1;
+         uint32_t src2_abs:1;
+         uint32_t src2_negate:1;
+         uint32_t pad1:7;
+         uint32_t dest_writemask:4;
+         uint32_t dest_subreg_nr:3;
+         uint32_t dest_reg_nr:8;
+      } da3src;
+   } bits1;
+
+
+   union {
+      struct
+      {
+         uint32_t src0_subreg_nr:5;
+         uint32_t src0_reg_nr:8;
+         uint32_t src0_abs:1;
+         uint32_t src0_negate:1;
+         uint32_t src0_address_mode:1;
+         uint32_t src0_horiz_stride:2;
+         uint32_t src0_width:3;
+         uint32_t src0_vert_stride:4;
+         uint32_t flag_reg_nr:1;
+         uint32_t pad:6;
+      } da1;
+
+      struct
+      {
+         int src0_indirect_offset:10;
+         uint32_t src0_subreg_nr:3;
+         uint32_t src0_abs:1;
+         uint32_t src0_negate:1;
+         uint32_t src0_address_mode:1;
+         uint32_t src0_horiz_stride:2;
+         uint32_t src0_width:3;
+         uint32_t src0_vert_stride:4;
+         uint32_t flag_reg_nr:1;
+         uint32_t pad:6;
+      } ia1;
+
+      struct
+      {
+         uint32_t src0_swz_x:2;
+         uint32_t src0_swz_y:2;
+         uint32_t src0_subreg_nr:1;
+         uint32_t src0_reg_nr:8;
+         uint32_t src0_abs:1;
+         uint32_t src0_negate:1;
+         uint32_t src0_address_mode:1;
+         uint32_t src0_swz_z:2;
+         uint32_t src0_swz_w:2;
+         uint32_t pad0:1;
+         uint32_t src0_vert_stride:4;
+         uint32_t flag_reg_nr:1;
+         uint32_t pad1:6;
+      } da16;
+
+      struct
+      {
+         uint32_t src0_swz_x:2;
+         uint32_t src0_swz_y:2;
+         int src0_indirect_offset:6;
+         uint32_t src0_subreg_nr:3;
+         uint32_t src0_abs:1;
+         uint32_t src0_negate:1;
+         uint32_t src0_address_mode:1;
+         uint32_t src0_swz_z:2;
+         uint32_t src0_swz_w:2;
+         uint32_t pad0:1;
+         uint32_t src0_vert_stride:4;
+         uint32_t flag_reg_nr:1;
+         uint32_t pad1:6;
+      } ia16;
+
+      /* Extended Message Descriptor for Ironlake (Gen5) SEND instruction.
+       *
+       * Does not apply to Gen6+.  The SFID/message target moved to bits
+       * 27:24 of the header (destreg__conditionalmod); EOT is in bits3.
+       */
+       struct
+       {
+           uint32_t pad:26;
+           uint32_t end_of_thread:1;
+           uint32_t pad1:1;
+           uint32_t sfid:4;
+       } send_gen5;  /* for Ironlake only */
+
+      struct {
+         uint32_t src0_rep_ctrl:1;
+         uint32_t src0_swizzle:8;
+         uint32_t src0_subreg_nr:3;
+         uint32_t src0_reg_nr:8;
+         uint32_t pad0:1;
+         uint32_t src1_rep_ctrl:1;
+         uint32_t src1_swizzle:8;
+         uint32_t src1_subreg_nr_low:2;
+      } da3src;
+   } bits2;
+
+   union
+   {
+      struct
+      {
+         uint32_t src1_subreg_nr:5;
+         uint32_t src1_reg_nr:8;
+         uint32_t src1_abs:1;
+         uint32_t src1_negate:1;
+         uint32_t src1_address_mode:1;
+         uint32_t src1_horiz_stride:2;
+         uint32_t src1_width:3;
+         uint32_t src1_vert_stride:4;
+         uint32_t pad0:7;
+      } da1;
+
+      struct
+      {
+         uint32_t src1_swz_x:2;
+         uint32_t src1_swz_y:2;
+         uint32_t src1_subreg_nr:1;
+         uint32_t src1_reg_nr:8;
+         uint32_t src1_abs:1;
+         uint32_t src1_negate:1;
+         uint32_t src1_address_mode:1;
+         uint32_t src1_swz_z:2;
+         uint32_t src1_swz_w:2;
+         uint32_t pad1:1;
+         uint32_t src1_vert_stride:4;
+         uint32_t pad2:7;
+      } da16;
+
+      struct
+      {
+         int  src1_indirect_offset:10;
+         uint32_t src1_subreg_nr:3;
+         uint32_t src1_abs:1;
+         uint32_t src1_negate:1;
+         uint32_t src1_address_mode:1;
+         uint32_t src1_horiz_stride:2;
+         uint32_t src1_width:3;
+         uint32_t src1_vert_stride:4;
+         uint32_t flag_reg_nr:1;
+         uint32_t pad1:6;
+      } ia1;
+
+      struct
+      {
+         uint32_t src1_swz_x:2;
+         uint32_t src1_swz_y:2;
+         int  src1_indirect_offset:6;
+         uint32_t src1_subreg_nr:3;
+         uint32_t src1_abs:1;
+         uint32_t src1_negate:1;
+         uint32_t pad0:1;
+         uint32_t src1_swz_z:2;
+         uint32_t src1_swz_w:2;
+         uint32_t pad1:1;
+         uint32_t src1_vert_stride:4;
+         uint32_t flag_reg_nr:1;
+         uint32_t pad2:6;
+      } ia16;
+
+
+      struct
+      {
+         int  jump_count:16;        /* note: signed */
+         uint32_t  pop_count:4;
+         uint32_t  pad0:12;
+      } if_else;
+
+      /* This is also used for gen7 IF/ELSE instructions */
+      struct
+      {
+         /* Signed jump distance to the ip to jump to if all channels
+          * are disabled after the break or continue.  It should point
+          * to the end of the innermost control flow block, as that's
+          * where some channel could get re-enabled.
+          */
+         int jip:16;
+
+         /* Signed jump distance to the location to resume execution
+          * of this channel if it's enabled for the break or continue.
+          */
+         int uip:16;
+      } break_cont;
+
+      /**
+       * \defgroup SEND instructions / Message Descriptors
+       *
+       * @{
+       */
+
+      /**
+       * Generic Message Descriptor for Gen4 SEND instructions.  The structs
+       * below expand function_control to something specific for their
+       * message.  Due to struct packing issues, they duplicate these bits.
+       *
+       * See the G45 PRM, Volume 4, Table 14-15.
+       */
+      struct {
+         uint32_t function_control:16;
+         uint32_t response_length:4;
+         uint32_t msg_length:4;
+         uint32_t msg_target:4;
+         uint32_t pad1:3;
+         uint32_t end_of_thread:1;
+      } generic;
+
+      /**
+       * Generic Message Descriptor for Gen5-7 SEND instructions.
+       *
+       * See the Sandybridge PRM, Volume 2 Part 2, Table 8-15.  (Sadly, most
+       * of the information on the SEND instruction is missing from the public
+       * Ironlake PRM.)
+       *
+       * The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies.
+       * According to the SEND instruction description:
+       * "The MSb of the message description, the EOT field, always comes from
+       *  bit 127 of the instruction word"...which is bit 31 of this field.
+       */
+      struct {
+         uint32_t function_control:19;
+         uint32_t header_present:1;
+         uint32_t response_length:5;
+         uint32_t msg_length:4;
+         uint32_t pad1:2;
+         uint32_t end_of_thread:1;
+      } generic_gen5;
+
+      /** G45 PRM, Volume 4, Section 6.1.1.1 */
+      struct {
+         uint32_t function:4;
+         uint32_t int_type:1;
+         uint32_t precision:1;
+         uint32_t saturate:1;
+         uint32_t data_type:1;
+         uint32_t pad0:8;
+         uint32_t response_length:4;
+         uint32_t msg_length:4;
+         uint32_t msg_target:4;
+         uint32_t pad1:3;
+         uint32_t end_of_thread:1;
+      } math;
+
+      /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
+      struct {
+         uint32_t function:4;
+         uint32_t int_type:1;
+         uint32_t precision:1;
+         uint32_t saturate:1;
+         uint32_t data_type:1;
+         uint32_t snapshot:1;
+         uint32_t pad0:10;
+         uint32_t header_present:1;
+         uint32_t response_length:5;
+         uint32_t msg_length:4;
+         uint32_t pad1:2;
+         uint32_t end_of_thread:1;
+      } math_gen5;
+
+      /** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */
+      struct {
+         uint32_t binding_table_index:8;
+         uint32_t sampler:4;
+         uint32_t return_format:2;
+         uint32_t msg_type:2;
+         uint32_t response_length:4;
+         uint32_t msg_length:4;
+         uint32_t msg_target:4;
+         uint32_t pad1:3;
+         uint32_t end_of_thread:1;
+      } sampler;
+
+      /** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */
+      struct {
+         uint32_t binding_table_index:8;
+         uint32_t sampler:4;
+         uint32_t msg_type:4;
+         uint32_t response_length:4;
+         uint32_t msg_length:4;
+         uint32_t msg_target:4;
+         uint32_t pad1:3;
+         uint32_t end_of_thread:1;
+      } sampler_g4x;
+
+      /** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */
+      struct {
+         uint32_t binding_table_index:8;
+         uint32_t sampler:4;
+         uint32_t msg_type:4;
+         uint32_t simd_mode:2;
+         uint32_t pad0:1;
+         uint32_t header_present:1;
+         uint32_t response_length:5;
+         uint32_t msg_length:4;
+         uint32_t pad1:2;
+         uint32_t end_of_thread:1;
+      } sampler_gen5;
+
+      struct {
+         uint32_t binding_table_index:8;
+         uint32_t sampler:4;
+         uint32_t msg_type:5;
+         uint32_t simd_mode:2;
+         uint32_t header_present:1;
+         uint32_t response_length:5;
+         uint32_t msg_length:4;
+         uint32_t pad1:2;
+         uint32_t end_of_thread:1;
+      } sampler_gen7;
+
+      struct brw_urb_immediate urb;
+
+      struct {
+         uint32_t opcode:4;
+         uint32_t offset:6;
+         uint32_t swizzle_control:2;
+         uint32_t pad:1;
+         uint32_t allocate:1;
+         uint32_t used:1;
+         uint32_t complete:1;
+         uint32_t pad0:3;
+         uint32_t header_present:1;
+         uint32_t response_length:5;
+         uint32_t msg_length:4;
+         uint32_t pad1:2;
+         uint32_t end_of_thread:1;
+      } urb_gen5;
+
+      struct {
+         uint32_t opcode:3;
+         uint32_t offset:11;
+         uint32_t swizzle_control:1;
+         uint32_t complete:1;
+         uint32_t per_slot_offset:1;
+         uint32_t pad0:2;
+         uint32_t header_present:1;
+         uint32_t response_length:5;
+         uint32_t msg_length:4;
+         uint32_t pad1:2;
+         uint32_t end_of_thread:1;
+      } urb_gen7;
+
+      /** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */
+      struct {
+         uint32_t binding_table_index:8;
+         uint32_t msg_control:4;
+         uint32_t msg_type:2;
+         uint32_t target_cache:2;
+         uint32_t response_length:4;
+         uint32_t msg_length:4;
+         uint32_t msg_target:4;
+         uint32_t pad1:3;
+         uint32_t end_of_thread:1;
+      } dp_read;
+
+      /** G45 PRM, Volume 4, Section 5.10.1.1.2 */
+      struct {
+         uint32_t binding_table_index:8;
+         uint32_t msg_control:3;
+         uint32_t msg_type:3;
+         uint32_t target_cache:2;
+         uint32_t response_length:4;
+         uint32_t msg_length:4;
+         uint32_t msg_target:4;
+         uint32_t pad1:3;
+         uint32_t end_of_thread:1;
+      } dp_read_g4x;
+
+      /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
+      struct {
+         uint32_t binding_table_index:8;
+         uint32_t msg_control:3;
+         uint32_t msg_type:3;
+         uint32_t target_cache:2;
+         uint32_t pad0:3;
+         uint32_t header_present:1;
+         uint32_t response_length:5;
+         uint32_t msg_length:4;
+         uint32_t pad1:2;
+         uint32_t end_of_thread:1;
+      } dp_read_gen5;
+
+      /** G45 PRM, Volume 4, Section 5.10.1.1.2.  For both Gen4 and G45. */
+      struct {
+         uint32_t binding_table_index:8;
+         uint32_t msg_control:3;
+         uint32_t last_render_target:1;
+         uint32_t msg_type:3;
+         uint32_t send_commit_msg:1;
+         uint32_t response_length:4;
+         uint32_t msg_length:4;
+         uint32_t msg_target:4;
+         uint32_t pad1:3;
+         uint32_t end_of_thread:1;
+      } dp_write;
+
+      /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
+      struct {
+         uint32_t binding_table_index:8;
+         uint32_t msg_control:3;
+         uint32_t last_render_target:1;
+         uint32_t msg_type:3;
+         uint32_t send_commit_msg:1;
+         uint32_t pad0:3;
+         uint32_t header_present:1;
+         uint32_t response_length:5;
+         uint32_t msg_length:4;
+         uint32_t pad1:2;
+         uint32_t end_of_thread:1;
+      } dp_write_gen5;
+
+      /**
+       * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
+       *
+       * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
+       **/
+      struct {
+         uint32_t binding_table_index:8;
+         uint32_t msg_control:5;
+         uint32_t msg_type:3;
+         uint32_t pad0:3;
+         uint32_t header_present:1;
+         uint32_t response_length:5;
+         uint32_t msg_length:4;
+         uint32_t pad1:2;
+         uint32_t end_of_thread:1;
+      } gen6_dp_sampler_const_cache;
+
+      /**
+       * Message for the Sandybridge Render Cache Data Port.
+       *
+       * Most fields are defined in the Sandybridge PRM, Volume 4 Part 1,
+       * Section 3.9.2.1.1: Message Descriptor.
+       *
+       * "Slot Group Select" and "Last Render Target" are part of the
+       * 5-bit message control for Render Target Write messages.  See
+       * Section 3.9.9.2.1 of the same volume.
+       */
+      struct {
+         uint32_t binding_table_index:8;
+         uint32_t msg_control:3;
+         uint32_t slot_group_select:1;
+         uint32_t last_render_target:1;
+         uint32_t msg_type:4;
+         uint32_t send_commit_msg:1;
+         uint32_t pad0:1;
+         uint32_t header_present:1;
+         uint32_t response_length:5;
+         uint32_t msg_length:4;
+         uint32_t pad1:2;
+         uint32_t end_of_thread:1;
+      } gen6_dp;
+
+      /**
+       * Message for any of the Gen7 Data Port caches.
+       *
+       * Most fields are defined in BSpec volume 5c.2 Data Port / Messages /
+       * Data Port Messages / Message Descriptor.  Once again, "Slot Group
+       * Select" and "Last Render Target" are part of the 6-bit message
+       * control for Render Target Writes.
+       */
+      struct {
+         uint32_t binding_table_index:8;
+         uint32_t msg_control:3;
+         uint32_t slot_group_select:1;
+         uint32_t last_render_target:1;
+         uint32_t msg_control_pad:1;
+         uint32_t msg_type:4;
+         uint32_t pad1:1;
+         uint32_t header_present:1;
+         uint32_t response_length:5;
+         uint32_t msg_length:4;
+         uint32_t pad2:2;
+         uint32_t end_of_thread:1;
+      } gen7_dp;
+      /** @} */
+
+      struct {
+         uint32_t src1_subreg_nr_high:1;
+         uint32_t src1_reg_nr:8;
+         uint32_t pad0:1;
+         uint32_t src2_rep_ctrl:1;
+         uint32_t src2_swizzle:8;
+         uint32_t src2_subreg_nr:3;
+         uint32_t src2_reg_nr:8;
+         uint32_t pad1:2;
+      } da3src;
+
+      int d;
+      uint32_t ud;
+      float f;
+   } bits3;
+};
+
+#endif
diff --git a/backend/src/gen/program.cpp b/backend/src/gen/program.cpp
new file mode 100644 (file)
index 0000000..324c206
--- /dev/null
@@ -0,0 +1,70 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+/**
+ * \file program.cpp
+ * \author Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+#include "gen/program.h"
+#include "gen/program.hpp"
+#include "ir/liveness.hpp"
+#include "ir/value.hpp"
+#include "ir/unit.hpp"
+#include "llvm/llvm_to_gen.hpp"
+
+namespace gbe {
+namespace gen {
+
+  Kernel::Kernel(void) :
+    args(NULL), insns(NULL), argNum(0), insnNum(0), liveness(NULL), dag(NULL)
+  {}
+  Kernel::~Kernel(void) {
+    GBE_SAFE_DELETE_ARRAY(insns);
+    GBE_SAFE_DELETE_ARRAY(args);
+    GBE_SAFE_DELETE(liveness);
+    GBE_SAFE_DELETE(dag);
+  }
+
+  Program::Program(void) {}
+  Program::~Program(void) {
+    for (auto it = kernels.begin(); it != kernels.end(); ++it)
+      GBE_DELETE(it->second);
+  }
+
+  bool Program::buildFromSource(const char *source, std::string &error) {
+    NOT_IMPLEMENTED;
+    return false;
+  }
+  bool Program::buildFromLLVMFile(const char *fileName, std::string &error) {
+    ir::Unit unit;
+    if (llvmToGen(unit, fileName) == false) {
+      error = std::string(fileName) + " not found";
+      return false;
+    }
+    this->buildFromUnit(unit, error);
+    return true;
+  }
+  bool Program::buildFromUnit(const ir::Unit &unit, std::string &error) {
+    return false;
+  }
+
+} /* namespace gen */
+} /* namespace gbe */
+
diff --git a/backend/src/gen/program.h b/backend/src/gen/program.h
new file mode 100644 (file)
index 0000000..bddcc76
--- /dev/null
@@ -0,0 +1,87 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+/**
+ * \file program.h
+ * \author Benjamin Segovia <benjamin.segovia@intel.com>
+ *
+ * C-like interface for the gen kernels and programs
+ */
+
+#ifndef __GBE_GEN_PROGRAM_H__
+#define __GBE_GEN_PROGRAM_H__
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*! Opaque structure that interfaces a Gen program */
+typedef struct GenProgram GenProgram;
+
+/*! Opaque structure that interfaces a Gen kernel (ie one OCL function) */
+typedef struct GenKernel GenKernel;
+
+/*! Argument type for each function call */
+enum GenArgType {
+  GEN_ARG_VALUE = 0,            // int, float and so on
+  GEN_ARG_GLOBAL_PTR = 1,       // __global, __constant
+  GEN_ARG_STRUCTURE = 2,        // By value structure
+  GEN_ARG_IMAGE = 3,            // image2d_t, image3d_t
+  GEN_ARG_INVALUE = 0xffffffff
+};
+
+/*! Create a new program from the given source code (zero terminated string) */
+GenProgram *GenProgramNewFromSource(const char *source);
+
+/*! Create a new program from the given blob */
+GenProgram *GenProgramNewFromBinary(const char *binary, size_t size);
+
+/*! Destroy and deallocate the given program */
+void GenProgramDelete(GenProgram *program);
+
+/*! Get the number of functions in the program */
+uint32_t GenProgramGetKernelNum(const GenProgram *program);
+
+/*! Get the kernel from its name */
+const GenKernel GenProgramGetKernel(const GenProgram *program, const char *name);
+
+/*! Get the Gen ISA source code */
+const char *GenKernelGetCode(const GenKernel *kernel);
+
+/*! Get the size of the source code */
+const size_t GenKernelGetCodeSize(const GenKernel *kernel);
+
+/*! Get the total number of arguments */
+uint32_t GenKernelGetArgNum(const GenKernel *kernel);
+
+/*! Get the size of the given argument */
+uint32_t GenKernelGetArgSize(const GenKernel *kernel, uint32_t argID);
+
+/*! Get the type of the given argument */
+GenArgType GenKernelGetArgType(const GenKernel *kernel, uint32_t argID);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __GBE_GEN_PROGRAM_H__ */
+
diff --git a/backend/src/gen/program.hpp b/backend/src/gen/program.hpp
new file mode 100644 (file)
index 0000000..654e025
--- /dev/null
@@ -0,0 +1,90 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+/**
+ * \file program.hpp
+ * \author Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+#ifndef __GBE_GEN_PROGRAM_HPP__
+#define __GBE_GEN_PROGRAM_HPP__
+
+#include "gen/brw_structs.h"
+#include "sys/hash_map.hpp"
+#include <string>
+
+namespace gbe {
+namespace ir {
+
+  class Unit;        // Compilation unit. Contains the program to compile
+  class Liveness;    // Describes liveness of each ir function register
+  class FunctionDAG; // Describes the instruction dependencies
+
+} /* namespace ir */
+} /* namespace gbe */
+
+namespace gbe {
+namespace gen {
+
+  struct KernelArgument
+  {
+    GenArgType type; //!< Pointer, structure, regular value?
+    size_t size;     //!< Size of each argument
+  };
+
+  /*! Describe a compiled kernel */
+  struct Kernel : public NonCopyable
+  {
+    /*! Create an empty kernel with the given name */
+    Kernel(void);
+    /*! Destroy it */
+    ~Kernel(void);
+
+    std::string name;        //!< Kernel name
+    KernelArgument *args;    //!< Each argument
+    brw_instruction *insns;  //!< Instruction stream
+    uint32_t argNum;         //!< Number of function arguments
+    uint32_t insnNum;        //!< Number of instructions
+    ir::Liveness *liveness;  //!< Used only for the build
+    ir::FunctionDAG *dag;    //!< Used only for the build
+    GBE_STRUCT(Kernel);      //!< Use gbe allocators
+  };
+
+  /*! Describe a compiled program */
+  struct Program : public NonCopyable
+  {
+    /*! Create an empty program */
+    Program(void);
+    /*! Destroy the program */
+    ~Program(void);
+    /*! Build a program from a ir::Unit */
+    bool buildFromUnit(const ir::Unit &unit, std::string &error);
+    /*! Buils a program from a LLVM source code */
+    bool buildFromLLVMFile(const char *fileName, std::string &error);
+    /*! Buils a program from a OCL string */
+    bool buildFromSource(const char *source, std::string &error);
+    /*! Kernels sorted by their name */
+    hash_map<std::string, Kernel*> kernels;
+  };
+
+} /* namespace gen */
+} /* namespace gbe */
+
+#endif /* __GBE_GEN_PROGRAM_HPP__ */
+
index 6cf73bc..802aed2 100644 (file)
@@ -35,7 +35,7 @@
 
 namespace gbe
 {
-  void llvmToGen(ir::Unit &unit, const char *fileName)
+  bool llvmToGen(ir::Unit &unit, const char *fileName)
   {
     using namespace llvm;
     // Get the global LLVM context
@@ -45,7 +45,7 @@ namespace gbe
     SMDiagnostic Err;
     std::auto_ptr<Module> M;
     M.reset(ParseIRFile(fileName, Err, c));
-    GBE_ASSERT (M.get() != 0);
+    if (M.get() == 0) return false;
     Module &mod = *M.get();
 
     llvm::PassManager passes;
@@ -58,6 +58,7 @@ namespace gbe
     passes.add(createGVNPass());                  // Remove redundancies
     passes.add(createGenPass(unit));
     passes.run(mod);
+    return true;
   }
 } /* namespace gbe */
 
index 84fa559..4006667 100644 (file)
@@ -31,7 +31,7 @@ namespace gbe {
   } /* namespace ir */
 
   /*! Convert the LLVM IR code to a GEN IR code */
-  void llvmToGen(ir::Unit &unit, const char *fileName);
+  bool llvmToGen(ir::Unit &unit, const char *fileName);
 
 } /* namespace gbe */