# set (VISIBILITY_FLAG "-fvisibility=hidden")
if (COMPILER STREQUAL "GCC")
- set (CMAKE_CXX_FLAGS "-Wstrict-aliasing=2 -Wno-invalid-offsetof -fstrict-aliasing -msse2 -ffast-math -fPIC -Wall -fno-rtti -std=c++0x")
+ set (CMAKE_C_CXX_FLAGS "-Wstrict-aliasing=2 -fstrict-aliasing -msse2 -ffast-math -fPIC -Wall")
+ set (CMAKE_CXX_FLAGS "${CMAKE_C_CXX_FLAGS} -Wno-invalid-offsetof -fno-rtti -std=c++0x")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${VISIBILITY_FLAG} -Wl,-E")
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
set (CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG -DGBE_DEBUG=0")
set (CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG -DGBE_DEBUG=0")
+ set (CMAKE_C_FLAGS "${CMAKE_C_CXX_FLAGS}")
+ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}")
+ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}")
+ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${VISIBILITY_FLAG} -Wl,-E")
+ set (CMAKE_C_FLAGS_DEBUG "-g -DGBE_DEBUG=1")
+ set (CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
+ set (CMAKE_C_FLAGS_MINSIZEREL "-Os -DNDEBUG -DGBE_DEBUG=0")
+ set (CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG -DGBE_DEBUG=0")
elseif (COMPILER STREQUAL "CLANG")
set (CMAKE_C_COMPILER "clang")
set (CMAKE_C_FLAGS "-Wall -std=c99")
--- /dev/null
+%.ll : %.cl Makefile stdlib.h
+ ./compile.sh $<
+
+all: add.ll\
+ add2.ll\
+ cmp.ll\
+ cmp_cvt.ll\
+ complex_struct.ll\
+ cycle.ll\
+ extract.ll\
+ function.ll\
+ function_param.ll\
+ get_global_id.ll\
+ insert.ll\
+ load_store.ll\
+ loop.ll\
+ loop2.ll\
+ loop3.ll\
+ loop4.ll\
+ loop5.ll\
+ select.ll\
+ short.ll\
+ shuffle.ll\
+ simple_float4.ll\
+ simple_float4_2.ll\
+ simple_float4_3.ll\
+ store.ll\
+ struct.ll\
+ struct2.ll\
+ test_select.ll\
+ undefined.ll\
+ vector_constant.ll\
+ void.ll
+
ir/function.hpp
ir/value.cpp
ir/value.hpp
- gen/brw_disasm.c)
+ gen/program.cpp
+ gen/brw_disasm.c
+ gen/brw_eu_emit.c
+ gen/brw_eu.c)
if (GBE_COMPILE_UTESTS)
set (GBE_SRC
--- /dev/null
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+ /*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#define PCI_CHIP_I810 0x7121
+#define PCI_CHIP_I810_DC100 0x7123
+#define PCI_CHIP_I810_E 0x7125
+#define PCI_CHIP_I815 0x1132
+
+#define PCI_CHIP_I830_M 0x3577
+#define PCI_CHIP_845_G 0x2562
+#define PCI_CHIP_I855_GM 0x3582
+#define PCI_CHIP_I865_G 0x2572
+
+#define PCI_CHIP_I915_G 0x2582
+#define PCI_CHIP_E7221_G 0x258A
+#define PCI_CHIP_I915_GM 0x2592
+#define PCI_CHIP_I945_G 0x2772
+#define PCI_CHIP_I945_GM 0x27A2
+#define PCI_CHIP_I945_GME 0x27AE
+
+#define PCI_CHIP_Q35_G 0x29B2
+#define PCI_CHIP_G33_G 0x29C2
+#define PCI_CHIP_Q33_G 0x29D2
+
+#define PCI_CHIP_IGD_GM 0xA011
+#define PCI_CHIP_IGD_G 0xA001
+
+#define IS_IGDGM(devid) (devid == PCI_CHIP_IGD_GM)
+#define IS_IGDG(devid) (devid == PCI_CHIP_IGD_G)
+#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid))
+
+#define PCI_CHIP_I965_G 0x29A2
+#define PCI_CHIP_I965_Q 0x2992
+#define PCI_CHIP_I965_G_1 0x2982
+#define PCI_CHIP_I946_GZ 0x2972
+#define PCI_CHIP_I965_GM 0x2A02
+#define PCI_CHIP_I965_GME 0x2A12
+
+#define PCI_CHIP_GM45_GM 0x2A42
+
+#define PCI_CHIP_IGD_E_G 0x2E02
+#define PCI_CHIP_Q45_G 0x2E12
+#define PCI_CHIP_G45_G 0x2E22
+#define PCI_CHIP_G41_G 0x2E32
+#define PCI_CHIP_B43_G 0x2E42
+#define PCI_CHIP_B43_G1 0x2E92
+
+#define PCI_CHIP_ILD_G 0x0042
+#define PCI_CHIP_ILM_G 0x0046
+
+#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* Desktop */
+#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112
+#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122
+#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* Mobile */
+#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116
+#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
+#define PCI_CHIP_SANDYBRIDGE_S 0x010A /* Server */
+
+#define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* Desktop */
+#define PCI_CHIP_IVYBRIDGE_GT2 0x0162
+#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* Mobile */
+#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166
+#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a /* Server */
+
+#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \
+ devid == PCI_CHIP_I915_GM || \
+ devid == PCI_CHIP_I945_GM || \
+ devid == PCI_CHIP_I945_GME || \
+ devid == PCI_CHIP_I965_GM || \
+ devid == PCI_CHIP_I965_GME || \
+ devid == PCI_CHIP_GM45_GM || \
+ IS_IGD(devid) || \
+ devid == PCI_CHIP_ILM_G)
+
+#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
+ devid == PCI_CHIP_Q45_G || \
+ devid == PCI_CHIP_G45_G || \
+ devid == PCI_CHIP_G41_G || \
+ devid == PCI_CHIP_B43_G || \
+ devid == PCI_CHIP_B43_G1)
+#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
+#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
+
+#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G)
+#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G)
+#define IS_GEN5(devid) (IS_ILD(devid) || IS_ILM(devid))
+
+#define IS_915(devid) (devid == PCI_CHIP_I915_G || \
+ devid == PCI_CHIP_E7221_G || \
+ devid == PCI_CHIP_I915_GM)
+
+#define IS_945(devid) (devid == PCI_CHIP_I945_G || \
+ devid == PCI_CHIP_I945_GM || \
+ devid == PCI_CHIP_I945_GME || \
+ devid == PCI_CHIP_G33_G || \
+ devid == PCI_CHIP_Q33_G || \
+ devid == PCI_CHIP_Q35_G || IS_IGD(devid))
+
+#define IS_GEN4(devid) (devid == PCI_CHIP_I965_G || \
+ devid == PCI_CHIP_I965_Q || \
+ devid == PCI_CHIP_I965_G_1 || \
+ devid == PCI_CHIP_I965_GM || \
+ devid == PCI_CHIP_I965_GME || \
+ devid == PCI_CHIP_I946_GZ || \
+ IS_G4X(devid))
+
+/* Compat macro for intel_decode.c */
+#define IS_IRONLAKE(devid) IS_GEN5(devid)
+
+#define IS_SNB_GT1(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
+ devid == PCI_CHIP_SANDYBRIDGE_S)
+
+#define IS_SNB_GT2(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
+ devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS)
+
+#define IS_GEN6(devid) (IS_SNB_GT1(devid) || IS_SNB_GT2(devid))
+
+#define IS_IVB_GT1(devid) (devid == PCI_CHIP_IVYBRIDGE_GT1 || \
+ devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \
+ devid == PCI_CHIP_IVYBRIDGE_S_GT1)
+
+#define IS_IVB_GT2(devid) (devid == PCI_CHIP_IVYBRIDGE_GT2 || \
+ devid == PCI_CHIP_IVYBRIDGE_M_GT2)
+
+#define IS_IVYBRIDGE(devid) (IS_IVB_GT1(devid) || IS_IVB_GT2(devid))
+
+#define IS_GEN7(devid) IS_IVYBRIDGE(devid)
+
+#define IS_965(devid) (IS_GEN4(devid) || \
+ IS_G4X(devid) || \
+ IS_GEN5(devid) || \
+ IS_GEN6(devid) || \
+ IS_GEN7(devid))
+
+#define IS_9XX(devid) (IS_915(devid) || \
+ IS_945(devid) || \
+ IS_965(devid))
+
+#define IS_GEN3(devid) (IS_915(devid) || \
+ IS_945(devid))
+
+#define IS_GEN2(devid) (devid == PCI_CHIP_I830_M || \
+ devid == PCI_CHIP_845_G || \
+ devid == PCI_CHIP_I855_GM || \
+ devid == PCI_CHIP_I865_G)
--- /dev/null
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#define INTEL_MASK(high, low) (((1<<((high)-(low)+1))-1)<<(low))
+#define SET_FIELD(value, field) (((value) << field ## _SHIFT) & field ## _MASK)
+#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT)
+
+#ifndef BRW_DEFINES_H
+#define BRW_DEFINES_H
+
+/* 3D state:
+ */
+#define PIPE_CONTROL_NOWRITE 0x00
+#define PIPE_CONTROL_WRITEIMMEDIATE 0x01
+#define PIPE_CONTROL_WRITEDEPTH 0x02
+#define PIPE_CONTROL_WRITETIMESTAMP 0x03
+
+#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
+#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01
+
+#define CMD_3D_PRIM 0x7b00 /* 3DPRIMITIVE */
+/* DW0 */
+# define GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT 10
+# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15)
+# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 15)
+/* DW1 */
+# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
+# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8)
+
+#define _3DPRIM_POINTLIST 0x01
+#define _3DPRIM_LINELIST 0x02
+#define _3DPRIM_LINESTRIP 0x03
+#define _3DPRIM_TRILIST 0x04
+#define _3DPRIM_TRISTRIP 0x05
+#define _3DPRIM_TRIFAN 0x06
+#define _3DPRIM_QUADLIST 0x07
+#define _3DPRIM_QUADSTRIP 0x08
+#define _3DPRIM_LINELIST_ADJ 0x09
+#define _3DPRIM_LINESTRIP_ADJ 0x0A
+#define _3DPRIM_TRILIST_ADJ 0x0B
+#define _3DPRIM_TRISTRIP_ADJ 0x0C
+#define _3DPRIM_TRISTRIP_REVERSE 0x0D
+#define _3DPRIM_POLYGON 0x0E
+#define _3DPRIM_RECTLIST 0x0F
+#define _3DPRIM_LINELOOP 0x10
+#define _3DPRIM_POINTLIST_BF 0x11
+#define _3DPRIM_LINESTRIP_CONT 0x12
+#define _3DPRIM_LINESTRIP_BF 0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
+
+#define BRW_ANISORATIO_2 0
+#define BRW_ANISORATIO_4 1
+#define BRW_ANISORATIO_6 2
+#define BRW_ANISORATIO_8 3
+#define BRW_ANISORATIO_10 4
+#define BRW_ANISORATIO_12 5
+#define BRW_ANISORATIO_14 6
+#define BRW_ANISORATIO_16 7
+
+#define BRW_BLENDFACTOR_ONE 0x1
+#define BRW_BLENDFACTOR_SRC_COLOR 0x2
+#define BRW_BLENDFACTOR_SRC_ALPHA 0x3
+#define BRW_BLENDFACTOR_DST_ALPHA 0x4
+#define BRW_BLENDFACTOR_DST_COLOR 0x5
+#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
+#define BRW_BLENDFACTOR_CONST_COLOR 0x7
+#define BRW_BLENDFACTOR_CONST_ALPHA 0x8
+#define BRW_BLENDFACTOR_SRC1_COLOR 0x9
+#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A
+#define BRW_BLENDFACTOR_ZERO 0x11
+#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12
+#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13
+#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14
+#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15
+#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17
+#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18
+#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19
+#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
+
+#define BRW_BLENDFUNCTION_ADD 0
+#define BRW_BLENDFUNCTION_SUBTRACT 1
+#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2
+#define BRW_BLENDFUNCTION_MIN 3
+#define BRW_BLENDFUNCTION_MAX 4
+
+#define BRW_ALPHATEST_FORMAT_UNORM8 0
+#define BRW_ALPHATEST_FORMAT_FLOAT32 1
+
+#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0
+#define BRW_CHROMAKEY_REPLACE_BLACK 1
+
+#define BRW_CLIP_API_OGL 0
+#define BRW_CLIP_API_DX 1
+
+#define BRW_CLIPMODE_NORMAL 0
+#define BRW_CLIPMODE_CLIP_ALL 1
+#define BRW_CLIPMODE_CLIP_NON_REJECTED 2
+#define BRW_CLIPMODE_REJECT_ALL 3
+#define BRW_CLIPMODE_ACCEPT_ALL 4
+#define BRW_CLIPMODE_KERNEL_CLIP 5
+
+#define BRW_CLIP_NDCSPACE 0
+#define BRW_CLIP_SCREENSPACE 1
+
+#define BRW_COMPAREFUNCTION_ALWAYS 0
+#define BRW_COMPAREFUNCTION_NEVER 1
+#define BRW_COMPAREFUNCTION_LESS 2
+#define BRW_COMPAREFUNCTION_EQUAL 3
+#define BRW_COMPAREFUNCTION_LEQUAL 4
+#define BRW_COMPAREFUNCTION_GREATER 5
+#define BRW_COMPAREFUNCTION_NOTEQUAL 6
+#define BRW_COMPAREFUNCTION_GEQUAL 7
+
+#define BRW_COVERAGE_PIXELS_HALF 0
+#define BRW_COVERAGE_PIXELS_1 1
+#define BRW_COVERAGE_PIXELS_2 2
+#define BRW_COVERAGE_PIXELS_4 3
+
+#define BRW_CULLMODE_BOTH 0
+#define BRW_CULLMODE_NONE 1
+#define BRW_CULLMODE_FRONT 2
+#define BRW_CULLMODE_BACK 3
+
+#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0
+#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
+
+#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
+#define BRW_DEPTHFORMAT_D32_FLOAT 1
+#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2
+#define BRW_DEPTHFORMAT_D24_UNORM_X8_UINT 3 /* GEN5 */
+#define BRW_DEPTHFORMAT_D16_UNORM 5
+
+#define BRW_FLOATING_POINT_IEEE_754 0
+#define BRW_FLOATING_POINT_NON_IEEE_754 1
+
+#define BRW_FRONTWINDING_CW 0
+#define BRW_FRONTWINDING_CCW 1
+
+#define BRW_SPRITE_POINT_ENABLE 16
+
+#define BRW_INDEX_BYTE 0
+#define BRW_INDEX_WORD 1
+#define BRW_INDEX_DWORD 2
+
+#define BRW_LOGICOPFUNCTION_CLEAR 0
+#define BRW_LOGICOPFUNCTION_NOR 1
+#define BRW_LOGICOPFUNCTION_AND_INVERTED 2
+#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3
+#define BRW_LOGICOPFUNCTION_AND_REVERSE 4
+#define BRW_LOGICOPFUNCTION_INVERT 5
+#define BRW_LOGICOPFUNCTION_XOR 6
+#define BRW_LOGICOPFUNCTION_NAND 7
+#define BRW_LOGICOPFUNCTION_AND 8
+#define BRW_LOGICOPFUNCTION_EQUIV 9
+#define BRW_LOGICOPFUNCTION_NOOP 10
+#define BRW_LOGICOPFUNCTION_OR_INVERTED 11
+#define BRW_LOGICOPFUNCTION_COPY 12
+#define BRW_LOGICOPFUNCTION_OR_REVERSE 13
+#define BRW_LOGICOPFUNCTION_OR 14
+#define BRW_LOGICOPFUNCTION_SET 15
+
+#define BRW_MAPFILTER_NEAREST 0x0
+#define BRW_MAPFILTER_LINEAR 0x1
+#define BRW_MAPFILTER_ANISOTROPIC 0x2
+
+#define BRW_MIPFILTER_NONE 0
+#define BRW_MIPFILTER_NEAREST 1
+#define BRW_MIPFILTER_LINEAR 3
+
+#define BRW_ADDRESS_ROUNDING_ENABLE_U_MAG 0x20
+#define BRW_ADDRESS_ROUNDING_ENABLE_U_MIN 0x10
+#define BRW_ADDRESS_ROUNDING_ENABLE_V_MAG 0x08
+#define BRW_ADDRESS_ROUNDING_ENABLE_V_MIN 0x04
+#define BRW_ADDRESS_ROUNDING_ENABLE_R_MAG 0x02
+#define BRW_ADDRESS_ROUNDING_ENABLE_R_MIN 0x01
+
+#define BRW_POLYGON_FRONT_FACING 0
+#define BRW_POLYGON_BACK_FACING 1
+
+#define BRW_PREFILTER_ALWAYS 0x0
+#define BRW_PREFILTER_NEVER 0x1
+#define BRW_PREFILTER_LESS 0x2
+#define BRW_PREFILTER_EQUAL 0x3
+#define BRW_PREFILTER_LEQUAL 0x4
+#define BRW_PREFILTER_GREATER 0x5
+#define BRW_PREFILTER_NOTEQUAL 0x6
+#define BRW_PREFILTER_GEQUAL 0x7
+
+#define BRW_PROVOKING_VERTEX_0 0
+#define BRW_PROVOKING_VERTEX_1 1
+#define BRW_PROVOKING_VERTEX_2 2
+
+#define BRW_RASTRULE_UPPER_LEFT 0
+#define BRW_RASTRULE_UPPER_RIGHT 1
+/* These are listed as "Reserved, but not seen as useful"
+ * in Intel documentation (page 212, "Point Rasterization Rule",
+ * section 7.4 "SF Pipeline State Summary", of document
+ * "Intel® 965 Express Chipset Family and Intel® G35 Express
+ * Chipset Graphics Controller Programmer's Reference Manual,
+ * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
+ * available at
+ * http://intellinuxgraphics.org/documentation.html
+ * at the time of this writing).
+ *
+ * These appear to be supported on at least some
+ * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT
+ * is useful when using OpenGL to render to a FBO
+ * (which has the pixel coordinate Y orientation inverted
+ * with respect to the normal OpenGL pixel coordinate system).
+ */
+#define BRW_RASTRULE_LOWER_LEFT 2
+#define BRW_RASTRULE_LOWER_RIGHT 3
+
+#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0
+#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1
+#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2
+
+#define BRW_STENCILOP_KEEP 0
+#define BRW_STENCILOP_ZERO 1
+#define BRW_STENCILOP_REPLACE 2
+#define BRW_STENCILOP_INCRSAT 3
+#define BRW_STENCILOP_DECRSAT 4
+#define BRW_STENCILOP_INCR 5
+#define BRW_STENCILOP_DECR 6
+#define BRW_STENCILOP_INVERT 7
+
+/* Surface state DW0 */
+#define BRW_SURFACE_RC_READ_WRITE (1 << 8)
+#define BRW_SURFACE_MIPLAYOUT_SHIFT 10
+#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0
+#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1
+#define BRW_SURFACE_CUBEFACE_ENABLES 0x3f
+#define BRW_SURFACE_BLEND_ENABLED (1 << 13)
+#define BRW_SURFACE_WRITEDISABLE_B_SHIFT 14
+#define BRW_SURFACE_WRITEDISABLE_G_SHIFT 15
+#define BRW_SURFACE_WRITEDISABLE_R_SHIFT 16
+#define BRW_SURFACE_WRITEDISABLE_A_SHIFT 17
+
+#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
+#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001
+#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002
+#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
+#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
+#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005
+#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
+#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
+#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
+#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040
+#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041
+#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042
+#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043
+#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044
+#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045
+#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046
+#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
+#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
+#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082
+#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083
+#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
+#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085
+#define BRW_SURFACEFORMAT_R32G32_SINT 0x086
+#define BRW_SURFACEFORMAT_R32G32_UINT 0x087
+#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
+#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
+#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A
+#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B
+#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C
+#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D
+#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
+#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
+#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090
+#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091
+#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092
+#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
+#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
+#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095
+#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
+#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
+#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
+#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
+#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
+#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
+#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC
+#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD
+#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE
+#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF
+#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
+#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
+#define BRW_SURFACEFORMAT_R32_SINT 0x0D6
+#define BRW_SURFACEFORMAT_R32_UINT 0x0D7
+#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8
+#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
+#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
+#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF
+#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0
+#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1
+#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2
+#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3
+#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4
+#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
+#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
+#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
+#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0
+#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1
+#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2
+#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
+#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
+#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
+#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6
+#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7
+#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8
+#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
+#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106
+#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107
+#define BRW_SURFACEFORMAT_R8G8_SINT 0x108
+#define BRW_SURFACEFORMAT_R8G8_UINT 0x109
+#define BRW_SURFACEFORMAT_R16_UNORM 0x10A
+#define BRW_SURFACEFORMAT_R16_SNORM 0x10B
+#define BRW_SURFACEFORMAT_R16_SINT 0x10C
+#define BRW_SURFACEFORMAT_R16_UINT 0x10D
+#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E
+#define BRW_SURFACEFORMAT_I16_UNORM 0x111
+#define BRW_SURFACEFORMAT_L16_UNORM 0x112
+#define BRW_SURFACEFORMAT_A16_UNORM 0x113
+#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114
+#define BRW_SURFACEFORMAT_I16_FLOAT 0x115
+#define BRW_SURFACEFORMAT_L16_FLOAT 0x116
+#define BRW_SURFACEFORMAT_A16_FLOAT 0x117
+#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118
+#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
+#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C
+#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D
+#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E
+#define BRW_SURFACEFORMAT_R16_USCALED 0x11F
+#define BRW_SURFACEFORMAT_R8_UNORM 0x140
+#define BRW_SURFACEFORMAT_R8_SNORM 0x141
+#define BRW_SURFACEFORMAT_R8_SINT 0x142
+#define BRW_SURFACEFORMAT_R8_UINT 0x143
+#define BRW_SURFACEFORMAT_A8_UNORM 0x144
+#define BRW_SURFACEFORMAT_I8_UNORM 0x145
+#define BRW_SURFACEFORMAT_L8_UNORM 0x146
+#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147
+#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148
+#define BRW_SURFACEFORMAT_R8_SSCALED 0x149
+#define BRW_SURFACEFORMAT_R8_USCALED 0x14A
+#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C
+#define BRW_SURFACEFORMAT_DXT1_RGB_SRGB 0x180
+#define BRW_SURFACEFORMAT_R1_UINT 0x181
+#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
+#define BRW_SURFACEFORMAT_BC1_UNORM 0x186
+#define BRW_SURFACEFORMAT_BC2_UNORM 0x187
+#define BRW_SURFACEFORMAT_BC3_UNORM 0x188
+#define BRW_SURFACEFORMAT_BC4_UNORM 0x189
+#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A
+#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
+#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
+#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
+#define BRW_SURFACEFORMAT_MONO8 0x18E
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
+#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190
+#define BRW_SURFACEFORMAT_DXT1_RGB 0x191
+#define BRW_SURFACEFORMAT_FXT1 0x192
+#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193
+#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194
+#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195
+#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196
+#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
+#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198
+#define BRW_SURFACEFORMAT_BC4_SNORM 0x199
+#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A
+#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C
+#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D
+#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
+#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+#define BRW_SURFACE_FORMAT_SHIFT 18
+#define BRW_SURFACE_FORMAT_MASK INTEL_MASK(26, 18)
+
+#define BRW_SURFACERETURNFORMAT_FLOAT32 0
+#define BRW_SURFACERETURNFORMAT_S1 1
+
+#define BRW_SURFACE_TYPE_SHIFT 29
+#define BRW_SURFACE_TYPE_MASK INTEL_MASK(31, 29)
+#define BRW_SURFACE_1D 0
+#define BRW_SURFACE_2D 1
+#define BRW_SURFACE_3D 2
+#define BRW_SURFACE_CUBE 3
+#define BRW_SURFACE_BUFFER 4
+#define BRW_SURFACE_NULL 7
+
+/* Surface state DW2 */
+#define BRW_SURFACE_HEIGHT_SHIFT 19
+#define BRW_SURFACE_HEIGHT_MASK INTEL_MASK(31, 19)
+#define BRW_SURFACE_WIDTH_SHIFT 6
+#define BRW_SURFACE_WIDTH_MASK INTEL_MASK(18, 6)
+#define BRW_SURFACE_LOD_SHIFT 2
+#define BRW_SURFACE_LOD_MASK INTEL_MASK(5, 2)
+
+/* Surface state DW3 */
+#define BRW_SURFACE_DEPTH_SHIFT 21
+#define BRW_SURFACE_DEPTH_MASK INTEL_MASK(31, 21)
+#define BRW_SURFACE_PITCH_SHIFT 3
+#define BRW_SURFACE_PITCH_MASK INTEL_MASK(19, 3)
+#define BRW_SURFACE_TILED (1 << 1)
+#define BRW_SURFACE_TILED_Y (1 << 0)
+
+/* Surface state DW4 */
+#define BRW_SURFACE_MIN_LOD_SHIFT 28
+#define BRW_SURFACE_MIN_LOD_MASK INTEL_MASK(31, 28)
+
+/* Surface state DW5 */
+#define BRW_SURFACE_X_OFFSET_SHIFT 25
+#define BRW_SURFACE_X_OFFSET_MASK INTEL_MASK(31, 25)
+#define BRW_SURFACE_VERTICAL_ALIGN_ENABLE (1 << 24)
+#define BRW_SURFACE_Y_OFFSET_SHIFT 20
+#define BRW_SURFACE_Y_OFFSET_MASK INTEL_MASK(23, 20)
+
+#define BRW_TEXCOORDMODE_WRAP 0
+#define BRW_TEXCOORDMODE_MIRROR 1
+#define BRW_TEXCOORDMODE_CLAMP 2
+#define BRW_TEXCOORDMODE_CUBE 3
+#define BRW_TEXCOORDMODE_CLAMP_BORDER 4
+#define BRW_TEXCOORDMODE_MIRROR_ONCE 5
+
+#define BRW_THREAD_PRIORITY_NORMAL 0
+#define BRW_THREAD_PRIORITY_HIGH 1
+
+#define BRW_TILEWALK_XMAJOR 0
+#define BRW_TILEWALK_YMAJOR 1
+
+#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0
+#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1
+
+/* Execution Unit (EU) defines
+ */
+
+#define BRW_ALIGN_1 0
+#define BRW_ALIGN_16 1
+
+#define BRW_ADDRESS_DIRECT 0
+#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1
+
+#define BRW_CHANNEL_X 0
+#define BRW_CHANNEL_Y 1
+#define BRW_CHANNEL_Z 2
+#define BRW_CHANNEL_W 3
+
+enum brw_compression {
+ BRW_COMPRESSION_NONE = 0,
+ BRW_COMPRESSION_2NDHALF = 1,
+ BRW_COMPRESSION_COMPRESSED = 2,
+};
+
+#define GEN6_COMPRESSION_1Q 0
+#define GEN6_COMPRESSION_2Q 1
+#define GEN6_COMPRESSION_3Q 2
+#define GEN6_COMPRESSION_4Q 3
+#define GEN6_COMPRESSION_1H 0
+#define GEN6_COMPRESSION_2H 2
+
+#define BRW_CONDITIONAL_NONE 0
+#define BRW_CONDITIONAL_Z 1
+#define BRW_CONDITIONAL_NZ 2
+#define BRW_CONDITIONAL_EQ 1 /* Z */
+#define BRW_CONDITIONAL_NEQ 2 /* NZ */
+#define BRW_CONDITIONAL_G 3
+#define BRW_CONDITIONAL_GE 4
+#define BRW_CONDITIONAL_L 5
+#define BRW_CONDITIONAL_LE 6
+#define BRW_CONDITIONAL_R 7
+#define BRW_CONDITIONAL_O 8
+#define BRW_CONDITIONAL_U 9
+
+#define BRW_DEBUG_NONE 0
+#define BRW_DEBUG_BREAKPOINT 1
+
+#define BRW_DEPENDENCY_NORMAL 0
+#define BRW_DEPENDENCY_NOTCLEARED 1
+#define BRW_DEPENDENCY_NOTCHECKED 2
+#define BRW_DEPENDENCY_DISABLE 3
+
+#define BRW_EXECUTE_1 0
+#define BRW_EXECUTE_2 1
+#define BRW_EXECUTE_4 2
+#define BRW_EXECUTE_8 3
+#define BRW_EXECUTE_16 4
+#define BRW_EXECUTE_32 5
+
+#define BRW_HORIZONTAL_STRIDE_0 0
+#define BRW_HORIZONTAL_STRIDE_1 1
+#define BRW_HORIZONTAL_STRIDE_2 2
+#define BRW_HORIZONTAL_STRIDE_4 3
+
+#define BRW_INSTRUCTION_NORMAL 0
+#define BRW_INSTRUCTION_SATURATE 1
+
+#define BRW_MASK_ENABLE 0
+#define BRW_MASK_DISABLE 1
+
+/** @{
+ *
+ * Gen6 has replaced "mask enable/disable" with WECtrl, which is
+ * effectively the same but much simpler to think about. Now, there
+ * are two contributors ANDed together to whether channels are
+ * executed: The predication on the instruction, and the channel write
+ * enable.
+ */
+/**
+ * This is the default value. It means that a channel's write enable is set
+ * if the per-channel IP is pointing at this instruction.
+ */
+#define BRW_WE_NORMAL 0
+/**
+ * This is used like BRW_MASK_DISABLE, and causes all channels to have
+ * their write enable set. Note that predication still contributes to
+ * whether the channel actually gets written.
+ */
+#define BRW_WE_ALL 1
+/** @} */
+
+enum opcode {
+ /* These are the actual hardware opcodes. */
+ BRW_OPCODE_MOV = 1,
+ BRW_OPCODE_SEL = 2,
+ BRW_OPCODE_NOT = 4,
+ BRW_OPCODE_AND = 5,
+ BRW_OPCODE_OR = 6,
+ BRW_OPCODE_XOR = 7,
+ BRW_OPCODE_SHR = 8,
+ BRW_OPCODE_SHL = 9,
+ BRW_OPCODE_RSR = 10,
+ BRW_OPCODE_RSL = 11,
+ BRW_OPCODE_ASR = 12,
+ BRW_OPCODE_CMP = 16,
+ BRW_OPCODE_CMPN = 17,
+ BRW_OPCODE_JMPI = 32,
+ BRW_OPCODE_IF = 34,
+ BRW_OPCODE_IFF = 35,
+ BRW_OPCODE_ELSE = 36,
+ BRW_OPCODE_ENDIF = 37,
+ BRW_OPCODE_DO = 38,
+ BRW_OPCODE_WHILE = 39,
+ BRW_OPCODE_BREAK = 40,
+ BRW_OPCODE_CONTINUE = 41,
+ BRW_OPCODE_HALT = 42,
+ BRW_OPCODE_MSAVE = 44,
+ BRW_OPCODE_MRESTORE = 45,
+ BRW_OPCODE_PUSH = 46,
+ BRW_OPCODE_POP = 47,
+ BRW_OPCODE_WAIT = 48,
+ BRW_OPCODE_SEND = 49,
+ BRW_OPCODE_SENDC = 50,
+ BRW_OPCODE_MATH = 56,
+ BRW_OPCODE_ADD = 64,
+ BRW_OPCODE_MUL = 65,
+ BRW_OPCODE_AVG = 66,
+ BRW_OPCODE_FRC = 67,
+ BRW_OPCODE_RNDU = 68,
+ BRW_OPCODE_RNDD = 69,
+ BRW_OPCODE_RNDE = 70,
+ BRW_OPCODE_RNDZ = 71,
+ BRW_OPCODE_MAC = 72,
+ BRW_OPCODE_MACH = 73,
+ BRW_OPCODE_LZD = 74,
+ BRW_OPCODE_SAD2 = 80,
+ BRW_OPCODE_SADA2 = 81,
+ BRW_OPCODE_DP4 = 84,
+ BRW_OPCODE_DPH = 85,
+ BRW_OPCODE_DP3 = 86,
+ BRW_OPCODE_DP2 = 87,
+ BRW_OPCODE_DPA2 = 88,
+ BRW_OPCODE_LINE = 89,
+ BRW_OPCODE_PLN = 90,
+ BRW_OPCODE_MAD = 91,
+ BRW_OPCODE_NOP = 126,
+
+ /* These are compiler backend opcodes that get translated into other
+ * instructions.
+ */
+ FS_OPCODE_FB_WRITE = 128,
+ SHADER_OPCODE_RCP,
+ SHADER_OPCODE_RSQ,
+ SHADER_OPCODE_SQRT,
+ SHADER_OPCODE_EXP2,
+ SHADER_OPCODE_LOG2,
+ SHADER_OPCODE_POW,
+ SHADER_OPCODE_INT_QUOTIENT,
+ SHADER_OPCODE_INT_REMAINDER,
+ SHADER_OPCODE_SIN,
+ SHADER_OPCODE_COS,
+
+ SHADER_OPCODE_TEX,
+ SHADER_OPCODE_TXD,
+ SHADER_OPCODE_TXF,
+ SHADER_OPCODE_TXL,
+ SHADER_OPCODE_TXS,
+ FS_OPCODE_TXB,
+
+ FS_OPCODE_DDX,
+ FS_OPCODE_DDY,
+ FS_OPCODE_PIXEL_X,
+ FS_OPCODE_PIXEL_Y,
+ FS_OPCODE_CINTERP,
+ FS_OPCODE_LINTERP,
+ FS_OPCODE_DISCARD,
+ FS_OPCODE_SPILL,
+ FS_OPCODE_UNSPILL,
+ FS_OPCODE_PULL_CONSTANT_LOAD,
+
+ VS_OPCODE_URB_WRITE,
+ VS_OPCODE_SCRATCH_READ,
+ VS_OPCODE_SCRATCH_WRITE,
+ VS_OPCODE_PULL_CONSTANT_LOAD,
+};
+
+#define BRW_PREDICATE_NONE 0
+#define BRW_PREDICATE_NORMAL 1
+#define BRW_PREDICATE_ALIGN1_ANYV 2
+#define BRW_PREDICATE_ALIGN1_ALLV 3
+#define BRW_PREDICATE_ALIGN1_ANY2H 4
+#define BRW_PREDICATE_ALIGN1_ALL2H 5
+#define BRW_PREDICATE_ALIGN1_ANY4H 6
+#define BRW_PREDICATE_ALIGN1_ALL4H 7
+#define BRW_PREDICATE_ALIGN1_ANY8H 8
+#define BRW_PREDICATE_ALIGN1_ALL8H 9
+#define BRW_PREDICATE_ALIGN1_ANY16H 10
+#define BRW_PREDICATE_ALIGN1_ALL16H 11
+#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4
+#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5
+#define BRW_PREDICATE_ALIGN16_ANY4H 6
+#define BRW_PREDICATE_ALIGN16_ALL4H 7
+
+#define BRW_ARCHITECTURE_REGISTER_FILE 0
+#define BRW_GENERAL_REGISTER_FILE 1
+#define BRW_MESSAGE_REGISTER_FILE 2
+#define BRW_IMMEDIATE_VALUE 3
+
+#define BRW_REGISTER_TYPE_UD 0
+#define BRW_REGISTER_TYPE_D 1
+#define BRW_REGISTER_TYPE_UW 2
+#define BRW_REGISTER_TYPE_W 3
+#define BRW_REGISTER_TYPE_UB 4
+#define BRW_REGISTER_TYPE_B 5
+#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
+#define BRW_REGISTER_TYPE_HF 6
+#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
+#define BRW_REGISTER_TYPE_F 7
+
+#define BRW_ARF_NULL 0x00
+#define BRW_ARF_ADDRESS 0x10
+#define BRW_ARF_ACCUMULATOR 0x20
+#define BRW_ARF_FLAG 0x30
+#define BRW_ARF_MASK 0x40
+#define BRW_ARF_MASK_STACK 0x50
+#define BRW_ARF_MASK_STACK_DEPTH 0x60
+#define BRW_ARF_STATE 0x70
+#define BRW_ARF_CONTROL 0x80
+#define BRW_ARF_NOTIFICATION_COUNT 0x90
+#define BRW_ARF_IP 0xA0
+
+#define BRW_MRF_COMPR4 (1 << 7)
+
+#define BRW_AMASK 0
+#define BRW_IMASK 1
+#define BRW_LMASK 2
+#define BRW_CMASK 3
+
+
+
+#define BRW_THREAD_NORMAL 0
+#define BRW_THREAD_ATOMIC 1
+#define BRW_THREAD_SWITCH 2
+
+#define BRW_VERTICAL_STRIDE_0 0
+#define BRW_VERTICAL_STRIDE_1 1
+#define BRW_VERTICAL_STRIDE_2 2
+#define BRW_VERTICAL_STRIDE_4 3
+#define BRW_VERTICAL_STRIDE_8 4
+#define BRW_VERTICAL_STRIDE_16 5
+#define BRW_VERTICAL_STRIDE_32 6
+#define BRW_VERTICAL_STRIDE_64 7
+#define BRW_VERTICAL_STRIDE_128 8
+#define BRW_VERTICAL_STRIDE_256 9
+#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
+
+#define BRW_WIDTH_1 0
+#define BRW_WIDTH_2 1
+#define BRW_WIDTH_4 2
+#define BRW_WIDTH_8 3
+#define BRW_WIDTH_16 4
+
+#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0
+#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1
+#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2
+#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3
+#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4
+#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5
+#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6
+#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7
+#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8
+#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9
+#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10
+#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11
+
+#define BRW_POLYGON_FACING_FRONT 0
+#define BRW_POLYGON_FACING_BACK 1
+
+/**
+ * Message target: Shared Function ID for where to SEND a message.
+ *
+ * These are enumerated in the ISA reference under "send - Send Message".
+ * In particular, see the following tables:
+ * - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition"
+ * - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor"
+ * - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) /
+ * Overview / GPE Function IDs
+ */
+enum brw_message_target {
+ BRW_SFID_NULL = 0,
+ BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */
+ BRW_SFID_SAMPLER = 2,
+ BRW_SFID_MESSAGE_GATEWAY = 3,
+ BRW_SFID_DATAPORT_READ = 4,
+ BRW_SFID_DATAPORT_WRITE = 5,
+ BRW_SFID_URB = 6,
+ BRW_SFID_THREAD_SPAWNER = 7,
+
+ GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4,
+ GEN6_SFID_DATAPORT_RENDER_CACHE = 5,
+ GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
+
+ GEN7_SFID_DATAPORT_DATA_CACHE = 10,
+};
+
+#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10
+
+#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
+#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
+#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3
+
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE 1
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
+
+#define GEN5_SAMPLER_MESSAGE_SAMPLE 0
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10
+
+/* for GEN5 only */
+#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
+#define BRW_SAMPLER_SIMD_MODE_SIMD8 1
+#define BRW_SAMPLER_SIMD_MODE_SIMD16 2
+#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3
+
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
+#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
+#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
+#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
+
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
+
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+
+/* This one stays the same across generations. */
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+/* GEN4 */
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
+#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+/* G45, GEN5 */
+#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
+#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
+#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3
+#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
+#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
+/* GEN6 */
+#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
+#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5
+#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
+
+#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
+#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
+#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
+
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
+
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
+#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
+#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+
+/* GEN6 */
+#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7
+#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8
+#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9
+#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10
+#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11
+#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12
+#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13
+#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14
+
+/* GEN7 */
+#define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 10
+
+#define BRW_MATH_FUNCTION_INV 1
+#define BRW_MATH_FUNCTION_LOG 2
+#define BRW_MATH_FUNCTION_EXP 3
+#define BRW_MATH_FUNCTION_SQRT 4
+#define BRW_MATH_FUNCTION_RSQ 5
+#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */
+#define BRW_MATH_FUNCTION_COS 7 /* was 8 */
+#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */
+#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */
+#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */
+#define BRW_MATH_FUNCTION_POW 10
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
+#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13
+
+#define BRW_MATH_INTEGER_UNSIGNED 0
+#define BRW_MATH_INTEGER_SIGNED 1
+
+#define BRW_MATH_PRECISION_FULL 0
+#define BRW_MATH_PRECISION_PARTIAL 1
+
+#define BRW_MATH_SATURATE_NONE 0
+#define BRW_MATH_SATURATE_SATURATE 1
+
+#define BRW_MATH_DATA_VECTOR 0
+#define BRW_MATH_DATA_SCALAR 1
+
+#define BRW_URB_OPCODE_WRITE 0
+
+#define BRW_URB_SWIZZLE_NONE 0
+#define BRW_URB_SWIZZLE_INTERLEAVE 1
+#define BRW_URB_SWIZZLE_TRANSPOSE 2
+
+#define BRW_SCRATCH_SPACE_SIZE_1K 0
+#define BRW_SCRATCH_SPACE_SIZE_2K 1
+#define BRW_SCRATCH_SPACE_SIZE_4K 2
+#define BRW_SCRATCH_SPACE_SIZE_8K 3
+#define BRW_SCRATCH_SPACE_SIZE_16K 4
+#define BRW_SCRATCH_SPACE_SIZE_32K 5
+#define BRW_SCRATCH_SPACE_SIZE_64K 6
+#define BRW_SCRATCH_SPACE_SIZE_128K 7
+#define BRW_SCRATCH_SPACE_SIZE_256K 8
+#define BRW_SCRATCH_SPACE_SIZE_512K 9
+#define BRW_SCRATCH_SPACE_SIZE_1M 10
+#define BRW_SCRATCH_SPACE_SIZE_2M 11
+
+
+
+
+#define CMD_URB_FENCE 0x6000
+#define CMD_CS_URB_STATE 0x6001
+#define CMD_CONST_BUFFER 0x6002
+
+#define CMD_STATE_BASE_ADDRESS 0x6101
+#define CMD_STATE_SIP 0x6102
+#define CMD_PIPELINE_SELECT_965 0x6104
+#define CMD_PIPELINE_SELECT_GM45 0x6904
+
+#define _3DSTATE_PIPELINED_POINTERS 0x7800
+#define _3DSTATE_BINDING_TABLE_POINTERS 0x7801
+# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8)
+# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9)
+# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12)
+
+#define _3DSTATE_BINDING_TABLE_POINTERS_VS 0x7826 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_HS 0x7827 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_DS 0x7828 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x7829 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782A /* GEN7+ */
+
+#define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GEN6+ */
+# define PS_SAMPLER_STATE_CHANGE (1 << 12)
+# define GS_SAMPLER_STATE_CHANGE (1 << 9)
+# define VS_SAMPLER_STATE_CHANGE (1 << 8)
+/* DW1: VS */
+/* DW2: GS */
+/* DW3: PS */
+
+#define _3DSTATE_SAMPLER_STATE_POINTERS_VS 0x782B /* GEN7+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS_GS 0x782E /* GEN7+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS_PS 0x782F /* GEN7+ */
+
+#define _3DSTATE_VERTEX_BUFFERS 0x7808
+# define BRW_VB0_INDEX_SHIFT 27
+# define GEN6_VB0_INDEX_SHIFT 26
+# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26)
+# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26)
+# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20)
+# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20)
+# define GEN7_VB0_ADDRESS_MODIFYENABLE (1 << 14)
+# define BRW_VB0_PITCH_SHIFT 0
+
+#define _3DSTATE_VERTEX_ELEMENTS 0x7809
+# define BRW_VE0_INDEX_SHIFT 27
+# define GEN6_VE0_INDEX_SHIFT 26
+# define BRW_VE0_FORMAT_SHIFT 16
+# define BRW_VE0_VALID (1 << 26)
+# define GEN6_VE0_VALID (1 << 25)
+# define BRW_VE0_SRC_OFFSET_SHIFT 0
+# define BRW_VE1_COMPONENT_NOSTORE 0
+# define BRW_VE1_COMPONENT_STORE_SRC 1
+# define BRW_VE1_COMPONENT_STORE_0 2
+# define BRW_VE1_COMPONENT_STORE_1_FLT 3
+# define BRW_VE1_COMPONENT_STORE_1_INT 4
+# define BRW_VE1_COMPONENT_STORE_VID 5
+# define BRW_VE1_COMPONENT_STORE_IID 6
+# define BRW_VE1_COMPONENT_STORE_PID 7
+# define BRW_VE1_COMPONENT_0_SHIFT 28
+# define BRW_VE1_COMPONENT_1_SHIFT 24
+# define BRW_VE1_COMPONENT_2_SHIFT 20
+# define BRW_VE1_COMPONENT_3_SHIFT 16
+# define BRW_VE1_DST_OFFSET_SHIFT 0
+
+#define CMD_INDEX_BUFFER 0x780a
+#define GEN4_3DSTATE_VF_STATISTICS 0x780b
+#define GM45_3DSTATE_VF_STATISTICS 0x680b
+#define _3DSTATE_CC_STATE_POINTERS 0x780e /* GEN6+ */
+#define _3DSTATE_BLEND_STATE_POINTERS 0x7824 /* GEN7+ */
+#define _3DSTATE_DEPTH_STENCIL_STATE_POINTERS 0x7825 /* GEN7+ */
+
+#define _3DSTATE_URB 0x7805 /* GEN6 */
+# define GEN6_URB_VS_SIZE_SHIFT 16
+# define GEN6_URB_VS_ENTRIES_SHIFT 0
+# define GEN6_URB_GS_ENTRIES_SHIFT 8
+# define GEN6_URB_GS_SIZE_SHIFT 0
+
+#define _3DSTATE_URB_VS 0x7830 /* GEN7+ */
+#define _3DSTATE_URB_HS 0x7831 /* GEN7+ */
+#define _3DSTATE_URB_DS 0x7832 /* GEN7+ */
+#define _3DSTATE_URB_GS 0x7833 /* GEN7+ */
+# define GEN7_URB_ENTRY_SIZE_SHIFT 16
+# define GEN7_URB_STARTING_ADDRESS_SHIFT 25
+
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GEN7+ */
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS 0x7916 /* GEN7+ */
+# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
+
+#define _3DSTATE_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */
+# define GEN6_CC_VIEWPORT_MODIFY (1 << 12)
+# define GEN6_SF_VIEWPORT_MODIFY (1 << 11)
+# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10)
+
+#define _3DSTATE_VIEWPORT_STATE_POINTERS_CC 0x7823 /* GEN7+ */
+#define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL 0x7821 /* GEN7+ */
+
+#define _3DSTATE_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */
+
+#define _3DSTATE_VS 0x7810 /* GEN6+ */
+/* DW2 */
+# define GEN6_VS_SPF_MODE (1 << 31)
+# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_VS_SAMPLER_COUNT_SHIFT 27
+# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16)
+/* DW4 */
+# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20
+# define GEN6_VS_URB_READ_LENGTH_SHIFT 11
+# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW5 */
+# define GEN6_VS_MAX_THREADS_SHIFT 25
+# define GEN6_VS_STATISTICS_ENABLE (1 << 10)
+# define GEN6_VS_CACHE_DISABLE (1 << 1)
+# define GEN6_VS_ENABLE (1 << 0)
+
+#define _3DSTATE_GS 0x7811 /* GEN6+ */
+/* DW2 */
+# define GEN6_GS_SPF_MODE (1 << 31)
+# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_GS_SAMPLER_COUNT_SHIFT 27
+# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_GS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_GS_FLOATING_POINT_MODE_ALT (1 << 16)
+/* DW4 */
+# define GEN6_GS_URB_READ_LENGTH_SHIFT 11
+# define GEN7_GS_INCLUDE_VERTEX_HANDLES (1 << 10)
+# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4
+# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0
+/* DW5 */
+# define GEN6_GS_MAX_THREADS_SHIFT 25
+# define GEN6_GS_STATISTICS_ENABLE (1 << 10)
+# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9)
+# define GEN6_GS_RENDERING_ENABLE (1 << 8)
+# define GEN7_GS_ENABLE (1 << 0)
+/* DW6 */
+# define GEN6_GS_REORDER (1 << 30)
+# define GEN6_GS_DISCARD_ADJACENCY (1 << 29)
+# define GEN6_GS_SVBI_PAYLOAD_ENABLE (1 << 28)
+# define GEN6_GS_SVBI_POSTINCREMENT_ENABLE (1 << 27)
+# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT 16
+# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16)
+# define GEN6_GS_ENABLE (1 << 15)
+
+# define BRW_GS_EDGE_INDICATOR_0 (1 << 8)
+# define BRW_GS_EDGE_INDICATOR_1 (1 << 9)
+
+#define _3DSTATE_HS 0x781B /* GEN7+ */
+#define _3DSTATE_TE 0x781C /* GEN7+ */
+#define _3DSTATE_DS 0x781D /* GEN7+ */
+
+#define _3DSTATE_CLIP 0x7812 /* GEN6+ */
+/* DW1 */
+# define GEN7_CLIP_WINDING_CW (0 << 20)
+# define GEN7_CLIP_WINDING_CCW (1 << 20)
+# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_8 (0 << 19)
+# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_4 (1 << 19)
+# define GEN7_CLIP_EARLY_CULL (1 << 18)
+# define GEN7_CLIP_CULLMODE_BOTH (0 << 16)
+# define GEN7_CLIP_CULLMODE_NONE (1 << 16)
+# define GEN7_CLIP_CULLMODE_FRONT (2 << 16)
+# define GEN7_CLIP_CULLMODE_BACK (3 << 16)
+# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10)
+/**
+ * Just does cheap culling based on the clip distance. Bits must be
+ * disjoint with USER_CLIP_CLIP_DISTANCE bits.
+ */
+# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT 0
+/* DW2 */
+# define GEN6_CLIP_ENABLE (1 << 31)
+# define GEN6_CLIP_API_OGL (0 << 30)
+# define GEN6_CLIP_API_D3D (1 << 30)
+# define GEN6_CLIP_XY_TEST (1 << 28)
+# define GEN6_CLIP_Z_TEST (1 << 27)
+# define GEN6_CLIP_GB_TEST (1 << 26)
+/** 8-bit field of which user clip distances to clip aganist. */
+# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT 16
+# define GEN6_CLIP_MODE_NORMAL (0 << 13)
+# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13)
+# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13)
+# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9)
+# define GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE (1 << 8)
+# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4
+# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2
+# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0
+/* DW3 */
+# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17
+# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6
+# define GEN6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5)
+
+#define _3DSTATE_SF 0x7813 /* GEN6+ */
+/* DW1 (for gen6) */
+# define GEN6_SF_NUM_OUTPUTS_SHIFT 22
+# define GEN6_SF_SWIZZLE_ENABLE (1 << 21)
+# define GEN6_SF_POINT_SPRITE_UPPERLEFT (0 << 20)
+# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20)
+# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
+# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW2 */
+# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11)
+# define GEN6_SF_STATISTICS_ENABLE (1 << 10)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7)
+# define GEN6_SF_FRONT_SOLID (0 << 5)
+# define GEN6_SF_FRONT_WIREFRAME (1 << 5)
+# define GEN6_SF_FRONT_POINT (2 << 5)
+# define GEN6_SF_BACK_SOLID (0 << 3)
+# define GEN6_SF_BACK_WIREFRAME (1 << 3)
+# define GEN6_SF_BACK_POINT (2 << 3)
+# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1)
+# define GEN6_SF_WINDING_CCW (1 << 0)
+/* DW3 */
+# define GEN6_SF_LINE_AA_ENABLE (1 << 31)
+# define GEN6_SF_CULL_BOTH (0 << 29)
+# define GEN6_SF_CULL_NONE (1 << 29)
+# define GEN6_SF_CULL_FRONT (2 << 29)
+# define GEN6_SF_CULL_BACK (3 << 29)
+# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */
+# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16)
+# define GEN6_SF_SCISSOR_ENABLE (1 << 11)
+# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8)
+# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8)
+# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8)
+# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8)
+/* DW4 */
+# define GEN6_SF_TRI_PROVOKE_SHIFT 29
+# define GEN6_SF_LINE_PROVOKE_SHIFT 27
+# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25
+# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14)
+# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14)
+# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12)
+# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12)
+# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11)
+# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */
+/* DW5: depth offset constant */
+/* DW6: depth offset scale */
+/* DW7: depth offset clamp */
+/* DW8 */
+# define ATTRIBUTE_1_OVERRIDE_W (1 << 31)
+# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30)
+# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29)
+# define ATTRIBUTE_1_OVERRIDE_X (1 << 28)
+# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25
+# define ATTRIBUTE_1_SWIZZLE_SHIFT 22
+# define ATTRIBUTE_1_SOURCE_SHIFT 16
+# define ATTRIBUTE_0_OVERRIDE_W (1 << 15)
+# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14)
+# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13)
+# define ATTRIBUTE_0_OVERRIDE_X (1 << 12)
+# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9
+# define ATTRIBUTE_0_SWIZZLE_SHIFT 6
+# define ATTRIBUTE_0_SOURCE_SHIFT 0
+
+# define ATTRIBUTE_SWIZZLE_INPUTATTR 0
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3
+# define ATTRIBUTE_SWIZZLE_SHIFT 6
+
+/* DW16: Point sprite texture coordinate enables */
+/* DW17: Constant interpolation enables */
+/* DW18: attr 0-7 wrap shortest enables */
+/* DW19: attr 8-16 wrap shortest enables */
+
+/* On GEN7, many fields of 3DSTATE_SF were split out into a new command:
+ * 3DSTATE_SBE. The remaining fields live in different DWords, but retain
+ * the same bit-offset. The only new field:
+ */
+/* GEN7/DW1: */
+# define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT 12
+
+#define _3DSTATE_SBE 0x781F /* GEN7+ */
+/* DW1 */
+# define GEN7_SBE_SWIZZLE_CONTROL_MODE (1 << 28)
+# define GEN7_SBE_NUM_OUTPUTS_SHIFT 22
+# define GEN7_SBE_SWIZZLE_ENABLE (1 << 21)
+# define GEN7_SBE_POINT_SPRITE_LOWERLEFT (1 << 20)
+# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11
+# define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW2-9: Attribute setup (same as DW8-15 of gen6 _3DSTATE_SF) */
+/* DW10: Point sprite texture coordinate enables */
+/* DW11: Constant interpolation enables */
+/* DW12: attr 0-7 wrap shortest enables */
+/* DW13: attr 8-16 wrap shortest enables */
+
+enum brw_wm_barycentric_interp_mode {
+ BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC = 0,
+ BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC = 1,
+ BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC = 2,
+ BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC = 3,
+ BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC = 4,
+ BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC = 5,
+ BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT = 6
+};
+
+#define _3DSTATE_WM 0x7814 /* GEN6+ */
+/* DW1: kernel pointer */
+/* DW2 */
+# define GEN6_WM_SPF_MODE (1 << 31)
+# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_WM_SAMPLER_COUNT_SHIFT 27
+# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16)
+/* DW3: scratch space */
+/* DW4 */
+# define GEN6_WM_STATISTICS_ENABLE (1 << 31)
+# define GEN6_WM_DEPTH_CLEAR (1 << 30)
+# define GEN6_WM_DEPTH_RESOLVE (1 << 28)
+# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27)
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0
+/* DW5 */
+# define GEN6_WM_MAX_THREADS_SHIFT 25
+# define GEN6_WM_KILL_ENABLE (1 << 22)
+# define GEN6_WM_COMPUTED_DEPTH (1 << 21)
+# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20)
+# define GEN6_WM_DISPATCH_ENABLE (1 << 19)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16)
+# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14)
+# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13)
+# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 11)
+# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9)
+# define GEN6_WM_USES_SOURCE_W (1 << 8)
+# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
+# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2)
+# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1)
+# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0)
+/* DW6 */
+# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20
+# define GEN6_WM_POSOFFSET_NONE (0 << 18)
+# define GEN6_WM_POSOFFSET_CENTROID (2 << 18)
+# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18)
+# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16)
+# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16)
+# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16)
+# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
+# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
+# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
+# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
+# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
+# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
+# define GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 10
+# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9)
+# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1)
+# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1)
+# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1)
+# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1)
+# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0)
+/* DW7: kernel 1 pointer */
+/* DW8: kernel 2 pointer */
+
+#define _3DSTATE_CONSTANT_VS 0x7815 /* GEN6+ */
+#define _3DSTATE_CONSTANT_GS 0x7816 /* GEN6+ */
+#define _3DSTATE_CONSTANT_PS 0x7817 /* GEN6+ */
+# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15)
+# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14)
+# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13)
+# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12)
+
+#define _3DSTATE_CONSTANT_HS 0x7819 /* GEN7+ */
+#define _3DSTATE_CONSTANT_DS 0x781A /* GEN7+ */
+
+#define _3DSTATE_STREAMOUT 0x781e /* GEN7+ */
+/* DW1 */
+# define SO_FUNCTION_ENABLE (1 << 31)
+# define SO_RENDERING_DISABLE (1 << 30)
+/* This selects which incoming rendering stream goes down the pipeline. The
+ * rendering stream is 0 if not defined by special cases in the GS state.
+ */
+# define SO_RENDER_STREAM_SELECT_SHIFT 27
+# define SO_RENDER_STREAM_SELECT_MASK INTEL_MASK(28, 27)
+/* Controls reordering of TRISTRIP_* elements in stream output (not rendering).
+ */
+# define SO_REORDER_TRAILING (1 << 26)
+/* Controls SO_NUM_PRIMS_WRITTEN_* and SO_PRIM_STORAGE_* */
+# define SO_STATISTICS_ENABLE (1 << 25)
+# define SO_BUFFER_ENABLE(n) (1 << (8 + (n)))
+/* DW2 */
+# define SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT 29
+# define SO_STREAM_3_VERTEX_READ_OFFSET_MASK INTEL_MASK(29, 29)
+# define SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT 24
+# define SO_STREAM_3_VERTEX_READ_LENGTH_MASK INTEL_MASK(28, 24)
+# define SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT 21
+# define SO_STREAM_2_VERTEX_READ_OFFSET_MASK INTEL_MASK(21, 21)
+# define SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT 16
+# define SO_STREAM_2_VERTEX_READ_LENGTH_MASK INTEL_MASK(20, 16)
+# define SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT 13
+# define SO_STREAM_1_VERTEX_READ_OFFSET_MASK INTEL_MASK(13, 13)
+# define SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT 8
+# define SO_STREAM_1_VERTEX_READ_LENGTH_MASK INTEL_MASK(12, 8)
+# define SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT 5
+# define SO_STREAM_0_VERTEX_READ_OFFSET_MASK INTEL_MASK(5, 5)
+# define SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT 0
+# define SO_STREAM_0_VERTEX_READ_LENGTH_MASK INTEL_MASK(4, 0)
+
+/* 3DSTATE_WM for Gen7 */
+/* DW1 */
+# define GEN7_WM_STATISTICS_ENABLE (1 << 31)
+# define GEN7_WM_DEPTH_CLEAR (1 << 30)
+# define GEN7_WM_DISPATCH_ENABLE (1 << 29)
+# define GEN7_WM_DEPTH_RESOLVE (1 << 28)
+# define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27)
+# define GEN7_WM_KILL_ENABLE (1 << 25)
+# define GEN7_WM_PSCDEPTH_OFF (0 << 23)
+# define GEN7_WM_PSCDEPTH_ON (1 << 23)
+# define GEN7_WM_PSCDEPTH_ON_GE (2 << 23)
+# define GEN7_WM_PSCDEPTH_ON_LE (3 << 23)
+# define GEN7_WM_USES_SOURCE_DEPTH (1 << 20)
+# define GEN7_WM_USES_SOURCE_W (1 << 19)
+# define GEN7_WM_POSITION_ZW_PIXEL (0 << 17)
+# define GEN7_WM_POSITION_ZW_CENTROID (2 << 17)
+# define GEN7_WM_POSITION_ZW_SAMPLE (3 << 17)
+# define GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 11
+# define GEN7_WM_USES_INPUT_COVERAGE_MASK (1 << 10)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 8)
+# define GEN7_WM_LINE_AA_WIDTH_0_5 (0 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_1_0 (1 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_2_0 (2 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_4_0 (3 << 6)
+# define GEN7_WM_POLYGON_STIPPLE_ENABLE (1 << 4)
+# define GEN7_WM_LINE_STIPPLE_ENABLE (1 << 3)
+# define GEN7_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 2)
+# define GEN7_WM_MSRAST_OFF_PIXEL (0 << 0)
+# define GEN7_WM_MSRAST_OFF_PATTERN (1 << 0)
+# define GEN7_WM_MSRAST_ON_PIXEL (2 << 0)
+# define GEN7_WM_MSRAST_ON_PATTERN (3 << 0)
+/* DW2 */
+# define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31)
+
+#define _3DSTATE_PS 0x7820 /* GEN7+ */
+/* DW1: kernel pointer */
+/* DW2 */
+# define GEN7_PS_SPF_MODE (1 << 31)
+# define GEN7_PS_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN7_PS_SAMPLER_COUNT_SHIFT 27
+# define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN7_PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN7_PS_FLOATING_POINT_MODE_ALT (1 << 16)
+/* DW3: scratch space */
+/* DW4 */
+# define GEN7_PS_MAX_THREADS_SHIFT 24
+# define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11)
+# define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10)
+# define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9)
+# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
+# define GEN7_PS_POSOFFSET_NONE (0 << 3)
+# define GEN7_PS_POSOFFSET_CENTROID (2 << 3)
+# define GEN7_PS_POSOFFSET_SAMPLE (3 << 3)
+# define GEN7_PS_32_DISPATCH_ENABLE (1 << 2)
+# define GEN7_PS_16_DISPATCH_ENABLE (1 << 1)
+# define GEN7_PS_8_DISPATCH_ENABLE (1 << 0)
+/* DW5 */
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_0 16
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_1 8
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_2 0
+/* DW6: kernel 1 pointer */
+/* DW7: kernel 2 pointer */
+
+#define _3DSTATE_SAMPLE_MASK 0x7818 /* GEN6+ */
+
+#define _3DSTATE_DRAWING_RECTANGLE 0x7900
+#define _3DSTATE_BLEND_CONSTANT_COLOR 0x7901
+#define _3DSTATE_CHROMA_KEY 0x7904
+#define _3DSTATE_DEPTH_BUFFER 0x7905 /* GEN4-6 */
+#define _3DSTATE_POLY_STIPPLE_OFFSET 0x7906
+#define _3DSTATE_POLY_STIPPLE_PATTERN 0x7907
+#define _3DSTATE_LINE_STIPPLE_PATTERN 0x7908
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
+#define _3DSTATE_AA_LINE_PARAMETERS 0x790a /* G45+ */
+
+#define _3DSTATE_GS_SVB_INDEX 0x790b /* CTG+ */
+/* DW1 */
+# define SVB_INDEX_SHIFT 29
+# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */
+/* DW2: SVB index */
+/* DW3: SVB maximum index */
+
+#define _3DSTATE_MULTISAMPLE 0x790d /* GEN6+ */
+/* DW1 */
+# define MS_PIXEL_LOCATION_CENTER (0 << 4)
+# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
+# define MS_NUMSAMPLES_1 (0 << 1)
+# define MS_NUMSAMPLES_4 (2 << 1)
+# define MS_NUMSAMPLES_8 (3 << 1)
+
+#define _3DSTATE_STENCIL_BUFFER 0x790e /* ILK, SNB */
+#define _3DSTATE_HIER_DEPTH_BUFFER 0x790f /* ILK, SNB */
+
+#define GEN7_3DSTATE_CLEAR_PARAMS 0x7804
+#define GEN7_3DSTATE_DEPTH_BUFFER 0x7805
+#define GEN7_3DSTATE_STENCIL_BUFFER 0x7806
+#define GEN7_3DSTATE_HIER_DEPTH_BUFFER 0x7807
+
+#define _3DSTATE_CLEAR_PARAMS 0x7910 /* ILK, SNB */
+# define DEPTH_CLEAR_VALID (1 << 15)
+/* DW1: depth clear value */
+
+#define _3DSTATE_SO_DECL_LIST 0x7917 /* GEN7+ */
+/* DW1 */
+# define SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT 12
+# define SO_STREAM_TO_BUFFER_SELECTS_3_MASK INTEL_MASK(15, 12)
+# define SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT 8
+# define SO_STREAM_TO_BUFFER_SELECTS_2_MASK INTEL_MASK(11, 8)
+# define SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT 4
+# define SO_STREAM_TO_BUFFER_SELECTS_1_MASK INTEL_MASK(7, 4)
+# define SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT 0
+# define SO_STREAM_TO_BUFFER_SELECTS_0_MASK INTEL_MASK(3, 0)
+/* DW2 */
+# define SO_NUM_ENTRIES_3_SHIFT 24
+# define SO_NUM_ENTRIES_3_MASK INTEL_MASK(31, 24)
+# define SO_NUM_ENTRIES_2_SHIFT 16
+# define SO_NUM_ENTRIES_2_MASK INTEL_MASK(23, 16)
+# define SO_NUM_ENTRIES_1_SHIFT 8
+# define SO_NUM_ENTRIES_1_MASK INTEL_MASK(15, 8)
+# define SO_NUM_ENTRIES_0_SHIFT 0
+# define SO_NUM_ENTRIES_0_MASK INTEL_MASK(7, 0)
+
+/* SO_DECL DW0 */
+# define SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT 12
+# define SO_DECL_OUTPUT_BUFFER_SLOT_MASK INTEL_MASK(13, 12)
+# define SO_DECL_HOLE_FLAG (1 << 11)
+# define SO_DECL_REGISTER_INDEX_SHIFT 4
+# define SO_DECL_REGISTER_INDEX_MASK INTEL_MASK(9, 4)
+# define SO_DECL_COMPONENT_MASK_SHIFT 0
+# define SO_DECL_COMPONENT_MASK_MASK INTEL_MASK(3, 0)
+
+#define _3DSTATE_SO_BUFFER 0x7918 /* GEN7+ */
+/* DW1 */
+# define SO_BUFFER_INDEX_SHIFT 29
+# define SO_BUFFER_INDEX_MASK INTEL_MASK(30, 29)
+# define SO_BUFFER_PITCH_SHIFT 0
+# define SO_BUFFER_PITCH_MASK INTEL_MASK(11, 0)
+/* DW2: start address */
+/* DW3: end address. */
+
+#define CMD_PIPE_CONTROL 0x7a00
+
+#define CMD_MI_FLUSH 0x0200
+
+
+/* Bitfields for the URB_WRITE message, DW2 of message header: */
+#define URB_WRITE_PRIM_END 0x1
+#define URB_WRITE_PRIM_START 0x2
+#define URB_WRITE_PRIM_TYPE_SHIFT 2
+
+
+/* Maximum number of entries that can be addressed using a binding table
+ * pointer of type SURFTYPE_BUFFER
+ */
+#define BRW_MAX_NUM_BUFFER_ENTRIES (1 << 27)
+
+#include "brw_chipset.h"
+
+#endif
#include <stdarg.h>
//#include "main/mtypes.h"
-
//#include "brw_context.h"
#include "brw_defines.h"
+#include "brw_structs.h"
+
+#include <stdint.h>
struct {
char *name;
static int src_ia1 (FILE *file,
uint32_t type,
uint32_t _reg_file,
- GLint _addr_imm,
+ int _addr_imm,
uint32_t _addr_subreg_nr,
uint32_t _negate,
uint32_t __abs,
*/
-#include "brw_context.h"
+// #include "brw_context.h"
#include "brw_defines.h"
#include "brw_eu.h"
-#include "glsl/ralloc.h"
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+
+// #include "glsl/ralloc.h"
/* Returns the corresponding conditional mod for swapping src0 and
* src1 in e.g. CMP.
/* How does predicate control work when execution_size != 8? Do I
* need to test/set for 0xffff when execution_size is 16?
*/
-void brw_set_predicate_control_flag_value( struct brw_compile *p, uint32_t value )
+void brw_set_predicate_control_flag_value(struct brw_compile *p, uint32_t value)
{
p->current->header.predicate_control = BRW_PREDICATE_NONE;
}
}
-void brw_set_predicate_control( struct brw_compile *p, uint32_t pc )
+void brw_set_predicate_control(struct brw_compile *p, uint32_t pc)
{
p->current->header.predicate_control = pc;
}
p->current->header.predicate_inverse = predicate_inverse;
}
-void brw_set_conditionalmod( struct brw_compile *p, uint32_t conditional )
+void brw_set_conditionalmod(struct brw_compile *p, uint32_t conditional)
{
p->current->header.destreg__conditionalmod = conditional;
}
-void brw_set_access_mode( struct brw_compile *p, uint32_t access_mode )
+void brw_set_access_mode(struct brw_compile *p, uint32_t access_mode)
{
p->current->header.access_mode = access_mode;
}
{
p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED);
- if (p->brw->intel.gen >= 6) {
+ if (p->gen >= 6) {
/* Since we don't use the 32-wide support in gen6, we translate
* the pre-gen6 compression control here.
*/
}
}
-void brw_set_mask_control( struct brw_compile *p, uint32_t value )
+void brw_set_mask_control(struct brw_compile *p, uint32_t value)
{
p->current->header.mask_control = value;
}
-void brw_set_saturate( struct brw_compile *p, uint32_t value )
+void brw_set_saturate(struct brw_compile *p, uint32_t value)
{
p->current->header.saturate = value;
}
+#if 0
void brw_set_acc_write_control(struct brw_compile *p, uint32_t value)
{
if (p->brw->intel.gen >= 6)
p->current->header.acc_wr_control = value;
}
+#endif
-void brw_push_insn_state( struct brw_compile *p )
+void brw_push_insn_state(struct brw_compile *p)
{
assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
p->current++;
}
-void brw_pop_insn_state( struct brw_compile *p )
+void brw_pop_insn_state(struct brw_compile *p)
{
assert(p->current != p->stack);
p->current--;
}
+#if 0
/***********************************************************************
*/
void
}
-const uint32_t *brw_get_program( struct brw_compile *p,
- uint32_t *sz )
+const uint32_t *brw_get_program(struct brw_compile *p,
+ uint32_t *sz)
{
uint32_t i;
c->first_label = NULL;
}
}
+#endif
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
-
-
#ifndef BRW_EU_H
#define BRW_EU_H
#include <stdbool.h>
+#include <assert.h>
#include "brw_structs.h"
#include "brw_defines.h"
-#include "program/prog_instruction.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
+#define WRITEMASK_X 0x1
+#define WRITEMASK_Y 0x2
+#define WRITEMASK_XY 0x3
+#define WRITEMASK_Z 0x4
+#define WRITEMASK_XZ 0x5
+#define WRITEMASK_YZ 0x6
+#define WRITEMASK_XYZ 0x7
+#define WRITEMASK_W 0x8
+#define WRITEMASK_XW 0x9
+#define WRITEMASK_YW 0xa
+#define WRITEMASK_XYW 0xb
+#define WRITEMASK_ZW 0xc
+#define WRITEMASK_XZW 0xd
+#define WRITEMASK_YZW 0xe
+#define WRITEMASK_XYZW 0xf
+
static inline bool brw_is_single_value_swizzle(int swiz)
{
return (swiz == BRW_SWIZZLE_XXXX ||
- swiz == BRW_SWIZZLE_YYYY ||
- swiz == BRW_SWIZZLE_ZZZZ ||
- swiz == BRW_SWIZZLE_WWWW);
+ swiz == BRW_SWIZZLE_YYYY ||
+ swiz == BRW_SWIZZLE_ZZZZ ||
+ swiz == BRW_SWIZZLE_WWWW);
}
#define REG_SIZE (8*4)
-
/* These aren't hardware structs, just something useful for us to pass around:
*
* Align1 operation has a lot of control over input ranges. Used in
*/
struct brw_reg
{
- GLuint type:4;
- GLuint file:2;
- GLuint nr:8;
- GLuint subnr:5; /* :1 in align16 */
- GLuint negate:1; /* source only */
- GLuint abs:1; /* source only */
- GLuint vstride:4; /* source only */
- GLuint width:3; /* src only, align1 only */
- GLuint hstride:2; /* align1 only */
- GLuint address_mode:1; /* relative addressing, hopefully! */
- GLuint pad0:1;
-
- union {
+ uint32_t type:4;
+ uint32_t file:2;
+ uint32_t nr:8;
+ uint32_t subnr:5; /* :1 in align16 */
+ uint32_t negate:1; /* source only */
+ uint32_t abs:1; /* source only */
+ uint32_t vstride:4; /* source only */
+ uint32_t width:3; /* src only, align1 only */
+ uint32_t hstride:2; /* align1 only */
+ uint32_t address_mode:1; /* relative addressing, hopefully! */
+ uint32_t pad0:1;
+
+ union {
struct {
- GLuint swizzle:8; /* src only, align16 only */
- GLuint writemask:4; /* dest only, align16 only */
- GLint indirect_offset:10; /* relative addressing offset */
- GLuint pad1:10; /* two dwords total */
+ uint32_t swizzle:8; /* src only, align16 only */
+ uint32_t writemask:4; /* dest only, align16 only */
+ int indirect_offset:10; /* relative addressing offset */
+ uint32_t pad1:10; /* two dwords total */
} bits;
- GLfloat f;
- GLint d;
- GLuint ud;
- } dw1;
+ float f;
+ int d;
+ uint32_t ud;
+ } dw1;
};
struct brw_indirect {
- GLuint addr_subnr:4;
- GLint addr_offset:10;
- GLuint pad:18;
+ uint32_t addr_subnr:4;
+ int addr_offset:10;
+ uint32_t pad:18;
};
-
-struct brw_glsl_label;
-struct brw_glsl_call;
-
-
-
#define BRW_EU_MAX_INSN_STACK 5
-
+#define BRW_MAX_INSTRUCTION_NUM 8192
struct brw_compile {
- struct brw_instruction *store;
- int store_size;
- GLuint nr_insn;
-
- void *mem_ctx;
-
- /* Allow clients to push/pop instruction state:
- */
- struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
- bool compressed_stack[BRW_EU_MAX_INSN_STACK];
- struct brw_instruction *current;
-
- GLuint flag_value;
- bool single_program_flow;
- bool compressed;
- struct brw_context *brw;
-
- /* Control flow stacks:
- * - if_stack contains IF and ELSE instructions which must be patched
- * (and popped) once the matching ENDIF instruction is encountered.
- *
- * Just store the instruction pointer(an index).
- */
- int *if_stack;
- int if_stack_depth;
- int if_stack_array_size;
-
- /**
- * loop_stack contains the instruction pointers of the starts of loops which
- * must be patched (and popped) once the matching WHILE instruction is
- * encountered.
- */
- int *loop_stack;
- /**
- * pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF
- * blocks they were popping out of, to fix up the mask stack. This tracks
- * the IF/ENDIF nesting in each current nested loop level.
- */
- int *if_depth_in_loop;
- int loop_stack_depth;
- int loop_stack_array_size;
-
- struct brw_glsl_label *first_label; /**< linked list of labels */
- struct brw_glsl_call *first_call; /**< linked list of CALs */
+ int gen;
+ struct brw_instruction store[8192];
+ int store_size;
+ uint32_t nr_insn;
+
+ /* Allow clients to push/pop instruction state */
+ struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+ bool compressed_stack[BRW_EU_MAX_INSN_STACK];
+ struct brw_instruction *current;
+
+ uint32_t flag_value;
+ bool single_program_flow;
+ bool compressed;
+ struct brw_context *brw;
};
-
void
-brw_save_label(struct brw_compile *c, const char *name, GLuint position);
+brw_save_label(struct brw_compile *c, const char *name, uint32_t position);
void
-brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos);
+brw_save_call(struct brw_compile *c, const char *name, uint32_t call_pos);
void
brw_resolve_cals(struct brw_compile *c);
-
-
-static INLINE int type_sz( GLuint type )
+static inline int type_sz(uint32_t type)
{
- switch( type ) {
+ switch(type) {
case BRW_REGISTER_TYPE_UD:
case BRW_REGISTER_TYPE_D:
case BRW_REGISTER_TYPE_F:
* \param swizzle one of BRW_SWIZZLE_x
* \param writemask WRITEMASK_X/Y/Z/W bitfield
*/
-static INLINE struct brw_reg brw_reg( GLuint file,
- GLuint nr,
- GLuint subnr,
- GLuint type,
- GLuint vstride,
- GLuint width,
- GLuint hstride,
- GLuint swizzle,
- GLuint writemask )
+static inline struct brw_reg brw_reg(uint32_t file,
+ uint32_t nr,
+ uint32_t subnr,
+ uint32_t type,
+ uint32_t vstride,
+ uint32_t width,
+ uint32_t hstride,
+ uint32_t swizzle,
+ uint32_t writemask)
{
struct brw_reg reg;
if (file == BRW_GENERAL_REGISTER_FILE)
}
/** Construct float[16] register */
-static INLINE struct brw_reg brw_vec16_reg( GLuint file,
- GLuint nr,
- GLuint subnr )
+static inline struct brw_reg brw_vec16_reg(uint32_t file,
+ uint32_t nr,
+ uint32_t subnr)
{
return brw_reg(file,
- nr,
- subnr,
- BRW_REGISTER_TYPE_F,
- BRW_VERTICAL_STRIDE_16,
- BRW_WIDTH_16,
- BRW_HORIZONTAL_STRIDE_1,
- BRW_SWIZZLE_XYZW,
- WRITEMASK_XYZW);
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_16,
+ BRW_WIDTH_16,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
}
/** Construct float[8] register */
-static INLINE struct brw_reg brw_vec8_reg( GLuint file,
- GLuint nr,
- GLuint subnr )
+static inline struct brw_reg brw_vec8_reg(uint32_t file,
+ uint32_t nr,
+ uint32_t subnr)
{
return brw_reg(file,
- nr,
- subnr,
- BRW_REGISTER_TYPE_F,
- BRW_VERTICAL_STRIDE_8,
- BRW_WIDTH_8,
- BRW_HORIZONTAL_STRIDE_1,
- BRW_SWIZZLE_XYZW,
- WRITEMASK_XYZW);
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
}
/** Construct float[4] register */
-static INLINE struct brw_reg brw_vec4_reg( GLuint file,
- GLuint nr,
- GLuint subnr )
+static inline struct brw_reg brw_vec4_reg(uint32_t file,
+ uint32_t nr,
+ uint32_t subnr)
{
return brw_reg(file,
- nr,
- subnr,
- BRW_REGISTER_TYPE_F,
- BRW_VERTICAL_STRIDE_4,
- BRW_WIDTH_4,
- BRW_HORIZONTAL_STRIDE_1,
- BRW_SWIZZLE_XYZW,
- WRITEMASK_XYZW);
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
}
/** Construct float[2] register */
-static INLINE struct brw_reg brw_vec2_reg( GLuint file,
- GLuint nr,
- GLuint subnr )
+static inline struct brw_reg brw_vec2_reg(uint32_t file,
+ uint32_t nr,
+ uint32_t subnr)
{
return brw_reg(file,
- nr,
- subnr,
- BRW_REGISTER_TYPE_F,
- BRW_VERTICAL_STRIDE_2,
- BRW_WIDTH_2,
- BRW_HORIZONTAL_STRIDE_1,
- BRW_SWIZZLE_XYXY,
- WRITEMASK_XY);
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYXY,
+ WRITEMASK_XY);
}
/** Construct float[1] register */
-static INLINE struct brw_reg brw_vec1_reg( GLuint file,
- GLuint nr,
- GLuint subnr )
+static inline struct brw_reg brw_vec1_reg(uint32_t file,
+ uint32_t nr,
+ uint32_t subnr)
{
return brw_reg(file,
- nr,
- subnr,
- BRW_REGISTER_TYPE_F,
- BRW_VERTICAL_STRIDE_0,
- BRW_WIDTH_1,
- BRW_HORIZONTAL_STRIDE_0,
- BRW_SWIZZLE_XXXX,
- WRITEMASK_X);
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XXXX,
+ WRITEMASK_X);
}
-static INLINE struct brw_reg retype( struct brw_reg reg,
- GLuint type )
+static inline struct brw_reg retype(struct brw_reg reg, uint32_t type)
{
reg.type = type;
return reg;
}
-static inline struct brw_reg
-sechalf(struct brw_reg reg)
+static inline struct brw_reg sechalf(struct brw_reg reg)
{
if (reg.vstride)
reg.nr++;
return reg;
}
-static INLINE struct brw_reg suboffset( struct brw_reg reg,
- GLuint delta )
-{
+static inline struct brw_reg suboffset(struct brw_reg reg, uint32_t delta)
+{
reg.subnr += delta * type_sz(reg.type);
return reg;
}
-
-static INLINE struct brw_reg offset( struct brw_reg reg,
- GLuint delta )
+static inline struct brw_reg offset(struct brw_reg reg, uint32_t delta)
{
reg.nr += delta;
return reg;
}
-
-static INLINE struct brw_reg byte_offset( struct brw_reg reg,
- GLuint bytes )
+static inline struct brw_reg byte_offset(struct brw_reg reg, uint32_t bytes)
{
- GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
+ uint32_t newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
reg.nr = newoffset / REG_SIZE;
reg.subnr = newoffset % REG_SIZE;
return reg;
}
-
+
/** Construct unsigned word[16] register */
-static INLINE struct brw_reg brw_uw16_reg( GLuint file,
- GLuint nr,
- GLuint subnr )
+static inline struct brw_reg brw_uw16_reg(uint32_t file,
+ uint32_t nr,
+ uint32_t subnr)
{
return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
/** Construct unsigned word[8] register */
-static INLINE struct brw_reg brw_uw8_reg( GLuint file,
- GLuint nr,
- GLuint subnr )
+static inline struct brw_reg brw_uw8_reg(uint32_t file,
+ uint32_t nr,
+ uint32_t subnr)
{
return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
/** Construct unsigned word[1] register */
-static INLINE struct brw_reg brw_uw1_reg( GLuint file,
- GLuint nr,
- GLuint subnr )
+static inline struct brw_reg brw_uw1_reg(uint32_t file,
+ uint32_t nr,
+ uint32_t subnr)
{
return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
-static INLINE struct brw_reg brw_imm_reg( GLuint type )
+static inline struct brw_reg brw_imm_reg(uint32_t type)
{
- return brw_reg( BRW_IMMEDIATE_VALUE,
- 0,
- 0,
- type,
- BRW_VERTICAL_STRIDE_0,
- BRW_WIDTH_1,
- BRW_HORIZONTAL_STRIDE_0,
- 0,
- 0);
+ return brw_reg(BRW_IMMEDIATE_VALUE,
+ 0,
+ 0,
+ type,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ 0,
+ 0);
}
/** Construct float immediate register */
-static INLINE struct brw_reg brw_imm_f( GLfloat f )
+static inline struct brw_reg brw_imm_f(float f)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
imm.dw1.f = f;
}
/** Construct integer immediate register */
-static INLINE struct brw_reg brw_imm_d( GLint d )
+static inline struct brw_reg brw_imm_d(int d)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
imm.dw1.d = d;
}
/** Construct uint immediate register */
-static INLINE struct brw_reg brw_imm_ud( GLuint ud )
+static inline struct brw_reg brw_imm_ud(uint32_t ud)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
imm.dw1.ud = ud;
}
/** Construct ushort immediate register */
-static INLINE struct brw_reg brw_imm_uw( GLushort uw )
+static inline struct brw_reg brw_imm_uw(uint16_t uw)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
imm.dw1.ud = uw | (uw << 16);
}
/** Construct short immediate register */
-static INLINE struct brw_reg brw_imm_w( GLshort w )
+static inline struct brw_reg brw_imm_w(short w)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
imm.dw1.d = w | (w << 16);
*/
/** Construct vector of eight signed half-byte values */
-static INLINE struct brw_reg brw_imm_v( GLuint v )
+static inline struct brw_reg brw_imm_v(uint32_t v)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
imm.vstride = BRW_VERTICAL_STRIDE_0;
}
/** Construct vector of four 8-bit float values */
-static INLINE struct brw_reg brw_imm_vf( GLuint v )
+static inline struct brw_reg brw_imm_vf(uint32_t v)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
imm.vstride = BRW_VERTICAL_STRIDE_0;
#define VF_ONE 0x30
#define VF_NEG (1<<7)
-static INLINE struct brw_reg brw_imm_vf4( GLuint v0,
- GLuint v1,
- GLuint v2,
- GLuint v3)
+static inline struct brw_reg brw_imm_vf4(uint32_t v0,
+ uint32_t v1,
+ uint32_t v2,
+ uint32_t v3)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
imm.vstride = BRW_VERTICAL_STRIDE_0;
imm.width = BRW_WIDTH_4;
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
imm.dw1.ud = ((v0 << 0) |
- (v1 << 8) |
- (v2 << 16) |
- (v3 << 24));
+ (v1 << 8) |
+ (v2 << 16) |
+ (v3 << 24));
return imm;
}
-static INLINE struct brw_reg brw_address( struct brw_reg reg )
+static inline struct brw_reg brw_address(struct brw_reg reg)
{
return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
}
/** Construct float[1] general-purpose register */
-static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr )
+static inline struct brw_reg brw_vec1_grf(uint32_t nr, uint32_t subnr)
{
return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
/** Construct float[2] general-purpose register */
-static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr )
+static inline struct brw_reg brw_vec2_grf(uint32_t nr, uint32_t subnr)
{
return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
/** Construct float[4] general-purpose register */
-static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr )
+static inline struct brw_reg brw_vec4_grf(uint32_t nr, uint32_t subnr)
{
return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
/** Construct float[8] general-purpose register */
-static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr )
+static inline struct brw_reg brw_vec8_grf(uint32_t nr, uint32_t subnr)
{
return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
-static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr )
+static inline struct brw_reg brw_uw8_grf(uint32_t nr, uint32_t subnr)
{
return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
-static INLINE struct brw_reg brw_uw16_grf( GLuint nr, GLuint subnr )
+static inline struct brw_reg brw_uw16_grf(uint32_t nr, uint32_t subnr)
{
return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
-
/** Construct null register (usually used for setting condition codes) */
-static INLINE struct brw_reg brw_null_reg( void )
+static inline struct brw_reg brw_null_reg(void)
{
- return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
- BRW_ARF_NULL,
- 0);
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_NULL,
+ 0);
}
-static INLINE struct brw_reg brw_address_reg( GLuint subnr )
+static inline struct brw_reg brw_address_reg(uint32_t subnr)
{
- return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
- BRW_ARF_ADDRESS,
- subnr);
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ADDRESS,
+ subnr);
}
/* If/else instructions break in align16 mode if writemask & swizzle
* aren't xyzw. This goes against the convention for other scalar
* regs:
*/
-static INLINE struct brw_reg brw_ip_reg( void )
+static inline struct brw_reg brw_ip_reg(void)
{
- return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
- BRW_ARF_IP,
- 0,
- BRW_REGISTER_TYPE_UD,
- BRW_VERTICAL_STRIDE_4, /* ? */
- BRW_WIDTH_1,
- BRW_HORIZONTAL_STRIDE_0,
- BRW_SWIZZLE_XYZW, /* NOTE! */
- WRITEMASK_XYZW); /* NOTE! */
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_IP,
+ 0,
+ BRW_REGISTER_TYPE_UD,
+ BRW_VERTICAL_STRIDE_4, /* ? */
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, /* NOTE! */
+ WRITEMASK_XYZW); /* NOTE! */
}
-static INLINE struct brw_reg brw_acc_reg( void )
+static inline struct brw_reg brw_acc_reg(void)
{
- return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
- BRW_ARF_ACCUMULATOR,
- 0);
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ACCUMULATOR,
+ 0);
}
-static INLINE struct brw_reg brw_notification_1_reg(void)
+static inline struct brw_reg brw_notification_1_reg(void)
{
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
- BRW_ARF_NOTIFICATION_COUNT,
- 1,
- BRW_REGISTER_TYPE_UD,
- BRW_VERTICAL_STRIDE_0,
- BRW_WIDTH_1,
- BRW_HORIZONTAL_STRIDE_0,
- BRW_SWIZZLE_XXXX,
- WRITEMASK_X);
+ BRW_ARF_NOTIFICATION_COUNT,
+ 1,
+ BRW_REGISTER_TYPE_UD,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XXXX,
+ WRITEMASK_X);
}
-static INLINE struct brw_reg brw_flag_reg( void )
+static inline struct brw_reg brw_flag_reg(void)
{
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
- BRW_ARF_FLAG,
- 0);
+ BRW_ARF_FLAG,
+ 0);
}
-static INLINE struct brw_reg brw_mask_reg( GLuint subnr )
+static inline struct brw_reg brw_mask_reg(uint32_t subnr)
{
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
- BRW_ARF_MASK,
- subnr);
+ BRW_ARF_MASK,
+ subnr);
}
-static INLINE struct brw_reg brw_message_reg( GLuint nr )
+static inline struct brw_reg brw_message_reg(uint32_t nr)
{
assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
- nr,
- 0);
+ nr,
+ 0);
}
-
-
-
/* This is almost always called with a numeric constant argument, so
* make things easy to evaluate at compile time:
*/
-static INLINE GLuint cvt( GLuint val )
+static inline uint32_t cvt(uint32_t val)
{
switch (val) {
case 0: return 0;
return 0;
}
-static INLINE struct brw_reg stride( struct brw_reg reg,
- GLuint vstride,
- GLuint width,
- GLuint hstride )
+static inline struct brw_reg stride(struct brw_reg reg,
+ uint32_t vstride,
+ uint32_t width,
+ uint32_t hstride)
{
reg.vstride = cvt(vstride);
reg.width = cvt(width) - 1;
}
-static INLINE struct brw_reg vec16( struct brw_reg reg )
+static inline struct brw_reg vec16(struct brw_reg reg)
{
return stride(reg, 16,16,1);
}
-static INLINE struct brw_reg vec8( struct brw_reg reg )
+static inline struct brw_reg vec8(struct brw_reg reg)
{
return stride(reg, 8,8,1);
}
-static INLINE struct brw_reg vec4( struct brw_reg reg )
+static inline struct brw_reg vec4(struct brw_reg reg)
{
return stride(reg, 4,4,1);
}
-static INLINE struct brw_reg vec2( struct brw_reg reg )
+static inline struct brw_reg vec2(struct brw_reg reg)
{
return stride(reg, 2,2,1);
}
-static INLINE struct brw_reg vec1( struct brw_reg reg )
+static inline struct brw_reg vec1(struct brw_reg reg)
{
return stride(reg, 0,1,0);
}
-
-static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt )
+static inline struct brw_reg get_element(struct brw_reg reg, uint32_t elt)
{
return vec1(suboffset(reg, elt));
}
-static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt )
+static inline struct brw_reg get_element_ud(struct brw_reg reg, uint32_t elt)
{
return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
}
-static INLINE struct brw_reg get_element_d( struct brw_reg reg, GLuint elt )
+static inline struct brw_reg get_element_d(struct brw_reg reg, uint32_t elt)
{
return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt));
}
-
-static INLINE struct brw_reg brw_swizzle( struct brw_reg reg,
- GLuint x,
- GLuint y,
- GLuint z,
- GLuint w)
+static inline struct brw_reg brw_swizzle(struct brw_reg reg,
+ uint32_t x,
+ uint32_t y,
+ uint32_t z,
+ uint32_t w)
{
assert(reg.file != BRW_IMMEDIATE_VALUE);
reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
- BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
- BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
- BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
return reg;
}
-static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg,
- GLuint x )
+static inline struct brw_reg brw_swizzle1(struct brw_reg reg,
+ uint32_t x)
{
return brw_swizzle(reg, x, x, x, x);
}
-static INLINE struct brw_reg brw_writemask( struct brw_reg reg,
- GLuint mask )
+static inline struct brw_reg brw_writemask(struct brw_reg reg,
+ uint32_t mask)
{
assert(reg.file != BRW_IMMEDIATE_VALUE);
reg.dw1.bits.writemask &= mask;
return reg;
}
-static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg,
- GLuint mask )
+static inline struct brw_reg brw_set_writemask(struct brw_reg reg, uint32_t mask)
{
assert(reg.file != BRW_IMMEDIATE_VALUE);
reg.dw1.bits.writemask = mask;
return reg;
}
-static INLINE struct brw_reg negate( struct brw_reg reg )
+static inline struct brw_reg negate(struct brw_reg reg)
{
reg.negate ^= 1;
return reg;
}
-static INLINE struct brw_reg brw_abs( struct brw_reg reg )
+static inline struct brw_reg brw_abs(struct brw_reg reg)
{
reg.abs = 1;
reg.negate = 0;
return reg;
}
-/***********************************************************************
- */
-static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr,
- GLint offset )
+static inline struct brw_reg brw_vec4_indirect(uint32_t subnr,
+ int offset)
{
struct brw_reg reg = brw_vec4_grf(0, 0);
reg.subnr = subnr;
return reg;
}
-static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr,
- GLint offset )
+static inline struct brw_reg brw_vec1_indirect(uint32_t subnr, int offset)
{
struct brw_reg reg = brw_vec1_grf(0, 0);
reg.subnr = subnr;
return reg;
}
-static INLINE struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset)
+static inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset)
{
return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
}
-static INLINE struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset)
+static inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset)
{
return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
}
-static INLINE struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset)
+static inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset)
{
return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
}
-static INLINE struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset)
+static inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset)
{
return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
}
-static INLINE struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset)
+static inline struct brw_reg deref_1d(struct brw_indirect ptr, int offset)
{
return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
}
-static INLINE struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset)
+static inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset)
{
return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
}
-static INLINE struct brw_reg get_addr_reg(struct brw_indirect ptr)
+static inline struct brw_reg get_addr_reg(struct brw_indirect ptr)
{
return brw_address_reg(ptr.addr_subnr);
}
-static INLINE struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset )
+static inline struct brw_indirect brw_indirect_offset(struct brw_indirect ptr, int offset)
{
ptr.addr_offset += offset;
return ptr;
}
-static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset )
+static inline struct brw_indirect brw_indirect(uint32_t addr_subnr, int offset)
{
struct brw_indirect ptr;
ptr.addr_subnr = addr_subnr;
}
/** Do two brw_regs refer to the same register? */
-static INLINE bool
+static inline bool
brw_same_reg(struct brw_reg r1, struct brw_reg r2)
{
return r1.file == r2.file && r1.nr == r2.nr;
}
-static INLINE struct brw_instruction *current_insn( struct brw_compile *p)
+static inline struct brw_instruction *current_insn(struct brw_compile *p)
{
return &p->store[p->nr_insn];
}
-void brw_pop_insn_state( struct brw_compile *p );
-void brw_push_insn_state( struct brw_compile *p );
-void brw_set_mask_control( struct brw_compile *p, GLuint value );
-void brw_set_saturate( struct brw_compile *p, GLuint value );
-void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
+void brw_pop_insn_state(struct brw_compile *p);
+void brw_push_insn_state(struct brw_compile *p);
+void brw_set_mask_control(struct brw_compile *p, uint32_t value);
+void brw_set_saturate(struct brw_compile *p, uint32_t value);
+void brw_set_access_mode(struct brw_compile *p, uint32_t access_mode);
void brw_set_compression_control(struct brw_compile *p, enum brw_compression c);
-void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
-void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
+void brw_set_predicate_control_flag_value(struct brw_compile *p, uint32_t value);
+void brw_set_predicate_control(struct brw_compile *p, uint32_t pc);
void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse);
-void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
-void brw_set_acc_write_control(struct brw_compile *p, GLuint value);
+void brw_set_conditionalmod(struct brw_compile *p, uint32_t conditional);
+void brw_set_acc_write_control(struct brw_compile *p, uint32_t value);
void brw_init_compile(struct brw_context *, struct brw_compile *p,
- void *mem_ctx);
-const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
+ void *mem_ctx);
+const uint32_t *brw_get_program(struct brw_compile *p, uint32_t *sz);
-struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode);
+struct brw_instruction *brw_next_insn(struct brw_compile *p, uint32_t opcode);
void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
- struct brw_reg dest);
+ struct brw_reg dest);
void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
- struct brw_reg reg);
+ struct brw_reg reg);
void gen6_resolve_implied_move(struct brw_compile *p,
- struct brw_reg *src,
- GLuint msg_reg_nr);
+ struct brw_reg *src,
+ uint32_t msg_reg_nr);
/* Helpers for regular instructions:
*/
-#define ALU1(OP) \
-struct brw_instruction *brw_##OP(struct brw_compile *p, \
- struct brw_reg dest, \
- struct brw_reg src0);
-
-#define ALU2(OP) \
-struct brw_instruction *brw_##OP(struct brw_compile *p, \
- struct brw_reg dest, \
- struct brw_reg src0, \
- struct brw_reg src1);
-
-#define ALU3(OP) \
-struct brw_instruction *brw_##OP(struct brw_compile *p, \
- struct brw_reg dest, \
- struct brw_reg src0, \
- struct brw_reg src1, \
- struct brw_reg src2);
+#define ALU1(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0);
+
+#define ALU2(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1);
+
+#define ALU3(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1, \
+ struct brw_reg src2);
#define ROUND(OP) \
void brw_##OP(struct brw_compile *p, struct brw_reg dest, struct brw_reg src0);
#undef ROUND
-/* Helpers for SEND instruction:
- */
+/* Helpers for SEND instruction */
void brw_set_sampler_message(struct brw_compile *p,
struct brw_instruction *insn,
- GLuint binding_table_index,
- GLuint sampler,
- GLuint msg_type,
- GLuint response_length,
- GLuint msg_length,
- GLuint header_present,
- GLuint simd_mode,
- GLuint return_format);
+ uint32_t binding_table_index,
+ uint32_t sampler,
+ uint32_t msg_type,
+ uint32_t response_length,
+ uint32_t msg_length,
+ uint32_t header_present,
+ uint32_t simd_mode,
+ uint32_t return_format);
void brw_set_dp_read_message(struct brw_compile *p,
- struct brw_instruction *insn,
- GLuint binding_table_index,
- GLuint msg_control,
- GLuint msg_type,
- GLuint target_cache,
- GLuint msg_length,
- GLuint response_length);
+ struct brw_instruction *insn,
+ uint32_t binding_table_index,
+ uint32_t msg_control,
+ uint32_t msg_type,
+ uint32_t target_cache,
+ uint32_t msg_length,
+ uint32_t response_length);
void brw_set_dp_write_message(struct brw_compile *p,
- struct brw_instruction *insn,
- GLuint binding_table_index,
- GLuint msg_control,
- GLuint msg_type,
- GLuint msg_length,
- bool header_present,
- GLuint last_render_target,
- GLuint response_length,
- GLuint end_of_thread,
- GLuint send_commit_msg);
-
-void brw_urb_WRITE(struct brw_compile *p,
- struct brw_reg dest,
- GLuint msg_reg_nr,
- struct brw_reg src0,
- bool allocate,
- bool used,
- GLuint msg_length,
- GLuint response_length,
- bool eot,
- bool writes_complete,
- GLuint offset,
- GLuint swizzle);
-
-void brw_ff_sync(struct brw_compile *p,
- struct brw_reg dest,
- GLuint msg_reg_nr,
- struct brw_reg src0,
- bool allocate,
- GLuint response_length,
- bool eot);
-
-void brw_svb_write(struct brw_compile *p,
- struct brw_reg dest,
- GLuint msg_reg_nr,
- struct brw_reg src0,
- GLuint binding_table_index,
- bool send_commit_msg);
-
-void brw_fb_WRITE(struct brw_compile *p,
- int dispatch_width,
- GLuint msg_reg_nr,
- struct brw_reg src0,
- GLuint binding_table_index,
- GLuint msg_length,
- GLuint response_length,
- bool eot,
- bool header_present);
+ struct brw_instruction *insn,
+ uint32_t binding_table_index,
+ uint32_t msg_control,
+ uint32_t msg_type,
+ uint32_t msg_length,
+ bool header_present,
+ uint32_t last_render_target,
+ uint32_t response_length,
+ uint32_t end_of_thread,
+ uint32_t send_commit_msg);
void brw_SAMPLE(struct brw_compile *p,
- struct brw_reg dest,
- GLuint msg_reg_nr,
- struct brw_reg src0,
- GLuint binding_table_index,
- GLuint sampler,
- GLuint writemask,
- GLuint msg_type,
- GLuint response_length,
- GLuint msg_length,
- GLuint header_present,
- GLuint simd_mode,
- GLuint return_format);
-
-void brw_math_16( struct brw_compile *p,
- struct brw_reg dest,
- GLuint function,
- GLuint saturate,
- GLuint msg_reg_nr,
- struct brw_reg src,
- GLuint precision );
-
-void brw_math( struct brw_compile *p,
- struct brw_reg dest,
- GLuint function,
- GLuint saturate,
- GLuint msg_reg_nr,
- struct brw_reg src,
- GLuint data_type,
- GLuint precision );
+ struct brw_reg dest,
+ uint32_t msg_reg_nr,
+ struct brw_reg src0,
+ uint32_t binding_table_index,
+ uint32_t sampler,
+ uint32_t writemask,
+ uint32_t msg_type,
+ uint32_t response_length,
+ uint32_t msg_length,
+ uint32_t header_present,
+ uint32_t simd_mode,
+ uint32_t return_format);
+
+void brw_math_16(struct brw_compile *p,
+ struct brw_reg dest,
+ uint32_t function,
+ uint32_t saturate,
+ uint32_t msg_reg_nr,
+ struct brw_reg src,
+ uint32_t precision);
+
+void brw_math(struct brw_compile *p,
+ struct brw_reg dest,
+ uint32_t function,
+ uint32_t saturate,
+ uint32_t msg_reg_nr,
+ struct brw_reg src,
+ uint32_t data_type,
+ uint32_t precision);
void brw_math2(struct brw_compile *p,
- struct brw_reg dest,
- GLuint function,
- struct brw_reg src0,
- struct brw_reg src1);
+ struct brw_reg dest,
+ uint32_t function,
+ struct brw_reg src0,
+ struct brw_reg src1);
void brw_oword_block_read(struct brw_compile *p,
- struct brw_reg dest,
- struct brw_reg mrf,
- uint32_t offset,
- uint32_t bind_table_index);
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ uint32_t offset,
+ uint32_t bind_table_index);
void brw_oword_block_read_scratch(struct brw_compile *p,
- struct brw_reg dest,
- struct brw_reg mrf,
- int num_regs,
- GLuint offset);
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ int num_regs,
+ uint32_t offset);
void brw_oword_block_write_scratch(struct brw_compile *p,
- struct brw_reg mrf,
- int num_regs,
- GLuint offset);
+ struct brw_reg mrf,
+ int num_regs,
+ uint32_t offset);
void brw_dword_scattered_read(struct brw_compile *p,
- struct brw_reg dest,
- struct brw_reg mrf,
- uint32_t bind_table_index);
-
-void brw_dp_READ_4_vs( struct brw_compile *p,
- struct brw_reg dest,
- GLuint location,
- GLuint bind_table_index );
-
-void brw_dp_READ_4_vs_relative(struct brw_compile *p,
- struct brw_reg dest,
- struct brw_reg addrReg,
- GLuint offset,
- GLuint bind_table_index);
-
-/* If/else/endif. Works by manipulating the execution flags on each
- * channel.
- */
-struct brw_instruction *brw_IF(struct brw_compile *p,
- GLuint execute_size);
-struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional,
- struct brw_reg src0, struct brw_reg src1);
-
-void brw_ELSE(struct brw_compile *p);
-void brw_ENDIF(struct brw_compile *p);
-
-/* DO/WHILE loops:
- */
-struct brw_instruction *brw_DO(struct brw_compile *p,
- GLuint execute_size);
-
-struct brw_instruction *brw_WHILE(struct brw_compile *p);
-
-struct brw_instruction *brw_BREAK(struct brw_compile *p);
-struct brw_instruction *brw_CONT(struct brw_compile *p);
-struct brw_instruction *gen6_CONT(struct brw_compile *p);
-struct brw_instruction *gen6_HALT(struct brw_compile *p);
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ uint32_t bind_table_index);
/* Forward jumps:
*/
void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx);
-
-
void brw_NOP(struct brw_compile *p);
void brw_WAIT(struct brw_compile *p);
* taken from src0:
*/
void brw_CMP(struct brw_compile *p,
- struct brw_reg dest,
- GLuint conditional,
- struct brw_reg src0,
- struct brw_reg src1);
+ struct brw_reg dest,
+ uint32_t conditional,
+ struct brw_reg src0,
+ struct brw_reg src1);
-void brw_print_reg( struct brw_reg reg );
-
-
-/***********************************************************************
- * brw_eu_util.c:
- */
+void brw_print_reg(struct brw_reg reg);
void brw_copy_indirect_to_indirect(struct brw_compile *p,
- struct brw_indirect dst_ptr,
- struct brw_indirect src_ptr,
- GLuint count);
+ struct brw_indirect dst_ptr,
+ struct brw_indirect src_ptr,
+ uint32_t count);
void brw_copy_from_indirect(struct brw_compile *p,
- struct brw_reg dst,
- struct brw_indirect ptr,
- GLuint count);
+ struct brw_reg dst,
+ struct brw_indirect ptr,
+ uint32_t count);
void brw_copy4(struct brw_compile *p,
- struct brw_reg dst,
- struct brw_reg src,
- GLuint count);
+ struct brw_reg dst,
+ struct brw_reg src,
+ uint32_t count);
void brw_copy8(struct brw_compile *p,
- struct brw_reg dst,
- struct brw_reg src,
- GLuint count);
+ struct brw_reg dst,
+ struct brw_reg src,
+ uint32_t count);
-void brw_math_invert( struct brw_compile *p,
- struct brw_reg dst,
- struct brw_reg src);
+void brw_math_invert(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src);
void brw_set_src1(struct brw_compile *p,
- struct brw_instruction *insn,
- struct brw_reg reg);
+ struct brw_instruction *insn,
+ struct brw_reg reg);
void brw_set_uip_jip(struct brw_compile *p);
uint32_t brw_swap_cmod(uint32_t cmod);
-/* brw_optimize.c */
-void brw_optimize(struct brw_compile *p);
-void brw_remove_duplicate_mrf_moves(struct brw_compile *p);
-void brw_remove_grf_to_mrf_moves(struct brw_compile *p);
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* BRW_EU_H */
-#endif
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
-
-#include "brw_context.h"
+// #include "brw_context.h"
#include "brw_defines.h"
#include "brw_eu.h"
-#include "glsl/ralloc.h"
+#include <string.h>
+
+#define Elements(x) (sizeof(x) / sizeof(*(x)))
/***********************************************************************
* Internal helper for constructing instructions
*/
static void guess_execution_size(struct brw_compile *p,
- struct brw_instruction *insn,
- struct brw_reg reg)
+ struct brw_instruction *insn,
+ struct brw_reg reg)
{
if (reg.width == BRW_WIDTH_8 && p->compressed)
insn->header.execution_size = BRW_EXECUTE_16;
else
- insn->header.execution_size = reg.width; /* note - definitions are compatible */
+ insn->header.execution_size = reg.width; /* note - definitions are compatible */
}
*/
void
gen6_resolve_implied_move(struct brw_compile *p,
- struct brw_reg *src,
- GLuint msg_reg_nr)
+ struct brw_reg *src,
+ uint32_t msg_reg_nr)
{
- struct intel_context *intel = &p->brw->intel;
- if (intel->gen < 6)
+ if (p->gen < 6)
return;
if (src->file == BRW_MESSAGE_REGISTER_FILE)
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
- retype(*src, BRW_REGISTER_TYPE_UD));
+ retype(*src, BRW_REGISTER_TYPE_UD));
brw_pop_insn_state(p);
}
*src = brw_message_reg(msg_reg_nr);
* Since we're pretending to have 16 MRFs anyway, we may as well use the
* registers required for messages with EOT.
*/
- struct intel_context *intel = &p->brw->intel;
- if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
+ if (p->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
reg->file = BRW_GENERAL_REGISTER_FILE;
reg->nr += GEN7_MRF_HACK_START;
}
void
brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
- struct brw_reg dest)
+ struct brw_reg dest)
{
if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
dest.file != BRW_MESSAGE_REGISTER_FILE)
insn->bits1.da1.dest_reg_nr = dest.nr;
if (insn->header.access_mode == BRW_ALIGN_1) {
- insn->bits1.da1.dest_subreg_nr = dest.subnr;
- if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
- dest.hstride = BRW_HORIZONTAL_STRIDE_1;
- insn->bits1.da1.dest_horiz_stride = dest.hstride;
+ insn->bits1.da1.dest_subreg_nr = dest.subnr;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.da1.dest_horiz_stride = dest.hstride;
}
else {
- insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
- insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
- /* even ignored in da16, still need to set as '01' */
- insn->bits1.da16.dest_horiz_stride = 1;
+ insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
+ insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+ /* even ignored in da16, still need to set as '01' */
+ insn->bits1.da16.dest_horiz_stride = 1;
}
}
else {
/* These are different sizes in align1 vs align16:
*/
if (insn->header.access_mode == BRW_ALIGN_1) {
- insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
- if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
- dest.hstride = BRW_HORIZONTAL_STRIDE_1;
- insn->bits1.ia1.dest_horiz_stride = dest.hstride;
+ insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.ia1.dest_horiz_stride = dest.hstride;
}
else {
- insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
- /* even ignored in da16, still need to set as '01' */
- insn->bits1.ia16.dest_horiz_stride = 1;
+ insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ /* even ignored in da16, still need to set as '01' */
+ insn->bits1.ia16.dest_horiz_stride = 1;
}
}
* destination horiz stride has to be a word.
*/
if (reg.type == BRW_REGISTER_TYPE_V) {
- assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
- reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
+ assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
+ reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
}
return;
width = width_for_reg[reg.width];
assert(insn->header.execution_size >= 0 &&
- insn->header.execution_size < Elements(execsize_for_reg));
+ insn->header.execution_size < Elements(execsize_for_reg));
execsize = execsize_for_reg[insn->header.execution_size];
/* Restrictions from 3.3.10: Register Region Restrictions. */
void
brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
- struct brw_reg reg)
+ struct brw_reg reg)
{
if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
else
{
if (reg.address_mode == BRW_ADDRESS_DIRECT) {
- if (insn->header.access_mode == BRW_ALIGN_1) {
- insn->bits2.da1.src0_subreg_nr = reg.subnr;
- insn->bits2.da1.src0_reg_nr = reg.nr;
- }
- else {
- insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
- insn->bits2.da16.src0_reg_nr = reg.nr;
- }
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits2.da1.src0_subreg_nr = reg.subnr;
+ insn->bits2.da1.src0_reg_nr = reg.nr;
+ }
+ else {
+ insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
+ insn->bits2.da16.src0_reg_nr = reg.nr;
+ }
}
else {
- insn->bits2.ia1.src0_subreg_nr = reg.subnr;
-
- if (insn->header.access_mode == BRW_ALIGN_1) {
- insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
- }
- else {
- insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
- }
+ insn->bits2.ia1.src0_subreg_nr = reg.subnr;
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
+ }
+ else {
+ insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
+ }
}
if (insn->header.access_mode == BRW_ALIGN_1) {
- if (reg.width == BRW_WIDTH_1 &&
- insn->header.execution_size == BRW_EXECUTE_1) {
- insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
- insn->bits2.da1.src0_width = BRW_WIDTH_1;
- insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
- }
- else {
- insn->bits2.da1.src0_horiz_stride = reg.hstride;
- insn->bits2.da1.src0_width = reg.width;
- insn->bits2.da1.src0_vert_stride = reg.vstride;
- }
+ if (reg.width == BRW_WIDTH_1 &&
+ insn->header.execution_size == BRW_EXECUTE_1) {
+ insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+ insn->bits2.da1.src0_width = BRW_WIDTH_1;
+ insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
+ }
+ else {
+ insn->bits2.da1.src0_horiz_stride = reg.hstride;
+ insn->bits2.da1.src0_width = reg.width;
+ insn->bits2.da1.src0_vert_stride = reg.vstride;
+ }
}
else {
- insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
- insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
- insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
- insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
-
- /* This is an oddity of the fact we're using the same
- * descriptions for registers in align_16 as align_1:
- */
- if (reg.vstride == BRW_VERTICAL_STRIDE_8)
- insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
- else
- insn->bits2.da16.src0_vert_stride = reg.vstride;
+ insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+ insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+ insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+ insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+ insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
+ else
+ insn->bits2.da16.src0_vert_stride = reg.vstride;
}
}
}
void brw_set_src1(struct brw_compile *p,
- struct brw_instruction *insn,
- struct brw_reg reg)
+ struct brw_instruction *insn,
+ struct brw_reg reg)
{
assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
/* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
if (insn->header.access_mode == BRW_ALIGN_1) {
- insn->bits3.da1.src1_subreg_nr = reg.subnr;
- insn->bits3.da1.src1_reg_nr = reg.nr;
+ insn->bits3.da1.src1_subreg_nr = reg.subnr;
+ insn->bits3.da1.src1_reg_nr = reg.nr;
}
else {
- insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
- insn->bits3.da16.src1_reg_nr = reg.nr;
+ insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
+ insn->bits3.da16.src1_reg_nr = reg.nr;
}
if (insn->header.access_mode == BRW_ALIGN_1) {
- if (reg.width == BRW_WIDTH_1 &&
- insn->header.execution_size == BRW_EXECUTE_1) {
- insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
- insn->bits3.da1.src1_width = BRW_WIDTH_1;
- insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
- }
- else {
- insn->bits3.da1.src1_horiz_stride = reg.hstride;
- insn->bits3.da1.src1_width = reg.width;
- insn->bits3.da1.src1_vert_stride = reg.vstride;
- }
+ if (reg.width == BRW_WIDTH_1 &&
+ insn->header.execution_size == BRW_EXECUTE_1) {
+ insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+ insn->bits3.da1.src1_width = BRW_WIDTH_1;
+ insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
+ }
+ else {
+ insn->bits3.da1.src1_horiz_stride = reg.hstride;
+ insn->bits3.da1.src1_width = reg.width;
+ insn->bits3.da1.src1_vert_stride = reg.vstride;
+ }
}
else {
- insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
- insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
- insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
- insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
-
- /* This is an oddity of the fact we're using the same
- * descriptions for registers in align_16 as align_1:
- */
- if (reg.vstride == BRW_VERTICAL_STRIDE_8)
- insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
- else
- insn->bits3.da16.src1_vert_stride = reg.vstride;
+ insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+ insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+ insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+ insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+ insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
+ else
+ insn->bits3.da16.src1_vert_stride = reg.vstride;
}
}
}
*/
static void
brw_set_message_descriptor(struct brw_compile *p,
- struct brw_instruction *inst,
- enum brw_message_target sfid,
- unsigned msg_length,
- unsigned response_length,
- bool header_present,
- bool end_of_thread)
+ struct brw_instruction *inst,
+ enum brw_message_target sfid,
+ unsigned msg_length,
+ unsigned response_length,
+ bool header_present,
+ bool end_of_thread)
{
- struct intel_context *intel = &p->brw->intel;
-
brw_set_src1(p, inst, brw_imm_d(0));
- if (intel->gen >= 5) {
+ if (p->gen >= 5) {
inst->bits3.generic_gen5.header_present = header_present;
inst->bits3.generic_gen5.response_length = response_length;
inst->bits3.generic_gen5.msg_length = msg_length;
inst->bits3.generic_gen5.end_of_thread = end_of_thread;
- if (intel->gen >= 6) {
- /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
- inst->header.destreg__conditionalmod = sfid;
+ if (p->gen >= 6) {
+ /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
+ inst->header.destreg__conditionalmod = sfid;
} else {
- /* Set Extended Message Descriptor (ex_desc) */
- inst->bits2.send_gen5.sfid = sfid;
- inst->bits2.send_gen5.end_of_thread = end_of_thread;
+ /* Set Extended Message Descriptor (ex_desc) */
+ inst->bits2.send_gen5.sfid = sfid;
+ inst->bits2.send_gen5.end_of_thread = end_of_thread;
}
} else {
inst->bits3.generic.response_length = response_length;
}
}
-static void brw_set_math_message( struct brw_compile *p,
- struct brw_instruction *insn,
- GLuint function,
- GLuint integer_type,
- bool low_precision,
- bool saturate,
- GLuint dataType )
+static void brw_set_math_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ uint32_t function,
+ uint32_t integer_type,
+ bool low_precision,
+ bool saturate,
+ uint32_t dataType)
{
- struct brw_context *brw = p->brw;
- struct intel_context *intel = &brw->intel;
unsigned msg_length;
unsigned response_length;
}
brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
- msg_length, response_length, false, false);
- if (intel->gen == 5) {
+ msg_length, response_length, false, false);
+ if (p->gen == 5) {
insn->bits3.math_gen5.function = function;
insn->bits3.math_gen5.int_type = integer_type;
insn->bits3.math_gen5.precision = low_precision;
}
}
-
-static void brw_set_ff_sync_message(struct brw_compile *p,
- struct brw_instruction *insn,
- bool allocate,
- GLuint response_length,
- bool end_of_thread)
-{
- brw_set_message_descriptor(p, insn, BRW_SFID_URB,
- 1, response_length, true, end_of_thread);
- insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
- insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
- insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
- insn->bits3.urb_gen5.allocate = allocate;
- insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
- insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
-}
-
-static void brw_set_urb_message( struct brw_compile *p,
- struct brw_instruction *insn,
- bool allocate,
- bool used,
- GLuint msg_length,
- GLuint response_length,
- bool end_of_thread,
- bool complete,
- GLuint offset,
- GLuint swizzle_control )
-{
- struct brw_context *brw = p->brw;
- struct intel_context *intel = &brw->intel;
-
- brw_set_message_descriptor(p, insn, BRW_SFID_URB,
- msg_length, response_length, true, end_of_thread);
- if (intel->gen == 7) {
- insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
- insn->bits3.urb_gen7.offset = offset;
- assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
- insn->bits3.urb_gen7.swizzle_control = swizzle_control;
- /* per_slot_offset = 0 makes it ignore offsets in message header */
- insn->bits3.urb_gen7.per_slot_offset = 0;
- insn->bits3.urb_gen7.complete = complete;
- } else if (intel->gen >= 5) {
- insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
- insn->bits3.urb_gen5.offset = offset;
- insn->bits3.urb_gen5.swizzle_control = swizzle_control;
- insn->bits3.urb_gen5.allocate = allocate;
- insn->bits3.urb_gen5.used = used; /* ? */
- insn->bits3.urb_gen5.complete = complete;
- } else {
- insn->bits3.urb.opcode = 0; /* ? */
- insn->bits3.urb.offset = offset;
- insn->bits3.urb.swizzle_control = swizzle_control;
- insn->bits3.urb.allocate = allocate;
- insn->bits3.urb.used = used; /* ? */
- insn->bits3.urb.complete = complete;
- }
-}
-
void
brw_set_dp_write_message(struct brw_compile *p,
- struct brw_instruction *insn,
- GLuint binding_table_index,
- GLuint msg_control,
- GLuint msg_type,
- GLuint msg_length,
- bool header_present,
- GLuint last_render_target,
- GLuint response_length,
- GLuint end_of_thread,
- GLuint send_commit_msg)
+ struct brw_instruction *insn,
+ uint32_t binding_table_index,
+ uint32_t msg_control,
+ uint32_t msg_type,
+ uint32_t msg_length,
+ bool header_present,
+ uint32_t last_render_target,
+ uint32_t response_length,
+ uint32_t end_of_thread,
+ uint32_t send_commit_msg)
{
- struct brw_context *brw = p->brw;
- struct intel_context *intel = &brw->intel;
unsigned sfid;
- if (intel->gen >= 7) {
- /* Use the Render Cache for RT writes; otherwise use the Data Cache */
- if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
- sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
- else
- sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
- } else if (intel->gen == 6) {
- /* Use the render cache for all write messages. */
- sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
- } else {
- sfid = BRW_SFID_DATAPORT_WRITE;
- }
-
+ /* Use the Render Cache for RT writes; otherwise use the Data Cache */
+ if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
+ sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
+ else
+ sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
- header_present, end_of_thread);
-
- if (intel->gen >= 7) {
- insn->bits3.gen7_dp.binding_table_index = binding_table_index;
- insn->bits3.gen7_dp.msg_control = msg_control;
- insn->bits3.gen7_dp.last_render_target = last_render_target;
- insn->bits3.gen7_dp.msg_type = msg_type;
- } else if (intel->gen == 6) {
- insn->bits3.gen6_dp.binding_table_index = binding_table_index;
- insn->bits3.gen6_dp.msg_control = msg_control;
- insn->bits3.gen6_dp.last_render_target = last_render_target;
- insn->bits3.gen6_dp.msg_type = msg_type;
- insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
- } else if (intel->gen == 5) {
- insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
- insn->bits3.dp_write_gen5.msg_control = msg_control;
- insn->bits3.dp_write_gen5.last_render_target = last_render_target;
- insn->bits3.dp_write_gen5.msg_type = msg_type;
- insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
- } else {
- insn->bits3.dp_write.binding_table_index = binding_table_index;
- insn->bits3.dp_write.msg_control = msg_control;
- insn->bits3.dp_write.last_render_target = last_render_target;
- insn->bits3.dp_write.msg_type = msg_type;
- insn->bits3.dp_write.send_commit_msg = send_commit_msg;
- }
+ header_present, end_of_thread);
+
+ insn->bits3.gen7_dp.binding_table_index = binding_table_index;
+ insn->bits3.gen7_dp.msg_control = msg_control;
+ insn->bits3.gen7_dp.last_render_target = last_render_target;
+ insn->bits3.gen7_dp.msg_type = msg_type;
}
void
brw_set_dp_read_message(struct brw_compile *p,
- struct brw_instruction *insn,
- GLuint binding_table_index,
- GLuint msg_control,
- GLuint msg_type,
- GLuint target_cache,
- GLuint msg_length,
- GLuint response_length)
+ struct brw_instruction *insn,
+ uint32_t binding_table_index,
+ uint32_t msg_control,
+ uint32_t msg_type,
+ uint32_t target_cache,
+ uint32_t msg_length,
+ uint32_t response_length)
{
- struct brw_context *brw = p->brw;
- struct intel_context *intel = &brw->intel;
unsigned sfid;
- if (intel->gen >= 7) {
- sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
- } else if (intel->gen == 6) {
- if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
- sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
- else
- sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
- } else {
- sfid = BRW_SFID_DATAPORT_READ;
- }
-
+ sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
- true, false);
-
- if (intel->gen >= 7) {
- insn->bits3.gen7_dp.binding_table_index = binding_table_index;
- insn->bits3.gen7_dp.msg_control = msg_control;
- insn->bits3.gen7_dp.last_render_target = 0;
- insn->bits3.gen7_dp.msg_type = msg_type;
- } else if (intel->gen == 6) {
- insn->bits3.gen6_dp.binding_table_index = binding_table_index;
- insn->bits3.gen6_dp.msg_control = msg_control;
- insn->bits3.gen6_dp.last_render_target = 0;
- insn->bits3.gen6_dp.msg_type = msg_type;
- insn->bits3.gen6_dp.send_commit_msg = 0;
- } else if (intel->gen == 5) {
- insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
- insn->bits3.dp_read_gen5.msg_control = msg_control;
- insn->bits3.dp_read_gen5.msg_type = msg_type;
- insn->bits3.dp_read_gen5.target_cache = target_cache;
- } else if (intel->is_g4x) {
- insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
- insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
- insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
- insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
- } else {
- insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
- insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
- insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
- insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
- }
+ true, false);
+
+ insn->bits3.gen7_dp.binding_table_index = binding_table_index;
+ insn->bits3.gen7_dp.msg_control = msg_control;
+ insn->bits3.gen7_dp.last_render_target = 0;
+ insn->bits3.gen7_dp.msg_type = msg_type;
}
void
brw_set_sampler_message(struct brw_compile *p,
struct brw_instruction *insn,
- GLuint binding_table_index,
- GLuint sampler,
- GLuint msg_type,
- GLuint response_length,
- GLuint msg_length,
- GLuint header_present,
- GLuint simd_mode,
- GLuint return_format)
+ uint32_t binding_table_index,
+ uint32_t sampler,
+ uint32_t msg_type,
+ uint32_t response_length,
+ uint32_t msg_length,
+ uint32_t header_present,
+ uint32_t simd_mode,
+ uint32_t return_format)
{
- struct brw_context *brw = p->brw;
- struct intel_context *intel = &brw->intel;
-
brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
- response_length, header_present, false);
-
- if (intel->gen >= 7) {
- insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
- insn->bits3.sampler_gen7.sampler = sampler;
- insn->bits3.sampler_gen7.msg_type = msg_type;
- insn->bits3.sampler_gen7.simd_mode = simd_mode;
- } else if (intel->gen >= 5) {
- insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
- insn->bits3.sampler_gen5.sampler = sampler;
- insn->bits3.sampler_gen5.msg_type = msg_type;
- insn->bits3.sampler_gen5.simd_mode = simd_mode;
- } else if (intel->is_g4x) {
- insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
- insn->bits3.sampler_g4x.sampler = sampler;
- insn->bits3.sampler_g4x.msg_type = msg_type;
- } else {
- insn->bits3.sampler.binding_table_index = binding_table_index;
- insn->bits3.sampler.sampler = sampler;
- insn->bits3.sampler.msg_type = msg_type;
- insn->bits3.sampler.return_format = return_format;
- }
+ response_length, header_present, false);
+ insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
+ insn->bits3.sampler_gen7.sampler = sampler;
+ insn->bits3.sampler_gen7.msg_type = msg_type;
+ insn->bits3.sampler_gen7.simd_mode = simd_mode;
}
-
#define next_insn brw_next_insn
struct brw_instruction *
-brw_next_insn(struct brw_compile *p, GLuint opcode)
+brw_next_insn(struct brw_compile *p, uint32_t opcode)
{
struct brw_instruction *insn;
-
+ assert(0);
+#if 0
if (p->nr_insn + 1 > p->store_size) {
- if (0)
- printf("incresing the store size to %d\n", p->store_size << 1);
p->store_size <<= 1;
p->store = reralloc(p->mem_ctx, p->store,
struct brw_instruction, p->store_size);
if (!p->store)
assert(!"realloc eu store memeory failed");
}
+#endif
insn = &p->store[p->nr_insn++];
memcpy(insn, p->current, sizeof(*insn));
return insn;
}
-static struct brw_instruction *brw_alu1( struct brw_compile *p,
- GLuint opcode,
- struct brw_reg dest,
- struct brw_reg src )
+static struct brw_instruction *brw_alu1(struct brw_compile *p,
+ uint32_t opcode,
+ struct brw_reg dest,
+ struct brw_reg src)
{
struct brw_instruction *insn = next_insn(p, opcode);
brw_set_dest(p, insn, dest);
}
static struct brw_instruction *brw_alu2(struct brw_compile *p,
- GLuint opcode,
- struct brw_reg dest,
- struct brw_reg src0,
- struct brw_reg src1 )
+ uint32_t opcode,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
struct brw_instruction *insn = next_insn(p, opcode);
brw_set_dest(p, insn, dest);
}
static struct brw_instruction *brw_alu3(struct brw_compile *p,
- GLuint opcode,
- struct brw_reg dest,
- struct brw_reg src0,
- struct brw_reg src1,
- struct brw_reg src2)
+ uint32_t opcode,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1,
+ struct brw_reg src2)
{
struct brw_instruction *insn = next_insn(p, opcode);
assert(insn->header.access_mode == BRW_ALIGN_16);
assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
- dest.file == BRW_MESSAGE_REGISTER_FILE);
+ dest.file == BRW_MESSAGE_REGISTER_FILE);
assert(dest.nr < 128);
assert(dest.address_mode == BRW_ADDRESS_DIRECT);
assert(dest.type = BRW_REGISTER_TYPE_F);
/***********************************************************************
* Convenience routines.
*/
-#define ALU1(OP) \
-struct brw_instruction *brw_##OP(struct brw_compile *p, \
- struct brw_reg dest, \
- struct brw_reg src0) \
-{ \
- return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
-}
-
-#define ALU2(OP) \
-struct brw_instruction *brw_##OP(struct brw_compile *p, \
- struct brw_reg dest, \
- struct brw_reg src0, \
- struct brw_reg src1) \
-{ \
- return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
-}
-
-#define ALU3(OP) \
-struct brw_instruction *brw_##OP(struct brw_compile *p, \
- struct brw_reg dest, \
- struct brw_reg src0, \
- struct brw_reg src1, \
- struct brw_reg src2) \
-{ \
- return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
+#define ALU1(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0) \
+{ \
+ return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
+}
+
+#define ALU2(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1) \
+{ \
+ return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
+}
+
+#define ALU3(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1, \
+ struct brw_reg src2) \
+{ \
+ return brw_alu3(p, BRW_OPCODE_##OP, dest, \
+ src0, src1, src2); \
}
/* Rounding operations (other than RNDD) require two instructions - the first
*
* Sandybridge and later appear to round correctly without an ADD.
*/
-#define ROUND(OP) \
-void brw_##OP(struct brw_compile *p, \
- struct brw_reg dest, \
- struct brw_reg src) \
-{ \
- struct brw_instruction *rnd, *add; \
- rnd = next_insn(p, BRW_OPCODE_##OP); \
- brw_set_dest(p, rnd, dest); \
- brw_set_src0(p, rnd, src); \
- \
- if (p->brw->intel.gen < 6) { \
- /* turn on round-increments */ \
- rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
- add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
- add->header.predicate_control = BRW_PREDICATE_NORMAL; \
- } \
+#define ROUND(OP) \
+void brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src) \
+{ \
+ struct brw_instruction *rnd; \
+ rnd = next_insn(p, BRW_OPCODE_##OP); \
+ brw_set_dest(p, rnd, dest); \
+ brw_set_src0(p, rnd, src); \
}
-
ALU1(MOV)
ALU2(SEL)
ALU1(NOT)
ROUND(RNDZ)
ROUND(RNDE)
-
struct brw_instruction *brw_ADD(struct brw_compile *p,
- struct brw_reg dest,
- struct brw_reg src0,
- struct brw_reg src1)
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* 6.2.2: add */
if (src0.type == BRW_REGISTER_TYPE_F ||
(src0.file == BRW_IMMEDIATE_VALUE &&
- src0.type == BRW_REGISTER_TYPE_VF)) {
+ src0.type == BRW_REGISTER_TYPE_VF)) {
assert(src1.type != BRW_REGISTER_TYPE_UD);
assert(src1.type != BRW_REGISTER_TYPE_D);
}
if (src1.type == BRW_REGISTER_TYPE_F ||
(src1.file == BRW_IMMEDIATE_VALUE &&
- src1.type == BRW_REGISTER_TYPE_VF)) {
+ src1.type == BRW_REGISTER_TYPE_VF)) {
assert(src0.type != BRW_REGISTER_TYPE_UD);
assert(src0.type != BRW_REGISTER_TYPE_D);
}
}
struct brw_instruction *brw_MUL(struct brw_compile *p,
- struct brw_reg dest,
- struct brw_reg src0,
- struct brw_reg src1)
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* 6.32.38: mul */
if (src0.type == BRW_REGISTER_TYPE_D ||
if (src0.type == BRW_REGISTER_TYPE_F ||
(src0.file == BRW_IMMEDIATE_VALUE &&
- src0.type == BRW_REGISTER_TYPE_VF)) {
+ src0.type == BRW_REGISTER_TYPE_VF)) {
assert(src1.type != BRW_REGISTER_TYPE_UD);
assert(src1.type != BRW_REGISTER_TYPE_D);
}
if (src1.type == BRW_REGISTER_TYPE_F ||
(src1.file == BRW_IMMEDIATE_VALUE &&
- src1.type == BRW_REGISTER_TYPE_VF)) {
+ src1.type == BRW_REGISTER_TYPE_VF)) {
assert(src0.type != BRW_REGISTER_TYPE_UD);
assert(src0.type != BRW_REGISTER_TYPE_D);
}
assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
- src0.nr != BRW_ARF_ACCUMULATOR);
+ src0.nr != BRW_ARF_ACCUMULATOR);
assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
- src1.nr != BRW_ARF_ACCUMULATOR);
+ src1.nr != BRW_ARF_ACCUMULATOR);
return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
}
brw_set_src1(p, insn, brw_imm_ud(0x0));
}
-
-
-
-
/***********************************************************************
* Comparisons, if/else/endif
*/
return insn;
}
-static void
-push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
-{
- p->if_stack[p->if_stack_depth] = inst - p->store;
-
- p->if_stack_depth++;
- if (p->if_stack_array_size <= p->if_stack_depth) {
- p->if_stack_array_size *= 2;
- p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
- p->if_stack_array_size);
- }
-}
-
-static struct brw_instruction *
-pop_if_stack(struct brw_compile *p)
-{
- p->if_stack_depth--;
- return &p->store[p->if_stack[p->if_stack_depth]];
-}
-
-static void
-push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
-{
- if (p->loop_stack_array_size < p->loop_stack_depth) {
- p->loop_stack_array_size *= 2;
- p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
- p->loop_stack_array_size);
- p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
- p->loop_stack_array_size);
- }
-
- p->loop_stack[p->loop_stack_depth] = inst - p->store;
- p->loop_stack_depth++;
- p->if_depth_in_loop[p->loop_stack_depth] = 0;
-}
-
-static struct brw_instruction *
-get_inner_do_insn(struct brw_compile *p)
-{
- return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
-}
-
-/* EU takes the value from the flag register and pushes it onto some
- * sort of a stack (presumably merging with any flag value already on
- * the stack). Within an if block, the flags at the top of the stack
- * control execution on each channel of the unit, eg. on each of the
- * 16 pixel values in our wm programs.
- *
- * When the matching 'else' instruction is reached (presumably by
- * countdown of the instruction count patched in by our ELSE/ENDIF
- * functions), the relevent flags are inverted.
- *
- * When the matching 'endif' instruction is reached, the flags are
- * popped off. If the stack is now empty, normal execution resumes.
- */
-struct brw_instruction *
-brw_IF(struct brw_compile *p, GLuint execute_size)
-{
- struct intel_context *intel = &p->brw->intel;
- struct brw_instruction *insn;
-
- insn = next_insn(p, BRW_OPCODE_IF);
-
- /* Override the defaults for this instruction:
- */
- if (intel->gen < 6) {
- brw_set_dest(p, insn, brw_ip_reg());
- brw_set_src0(p, insn, brw_ip_reg());
- brw_set_src1(p, insn, brw_imm_d(0x0));
- } else if (intel->gen == 6) {
- brw_set_dest(p, insn, brw_imm_w(0));
- insn->bits1.branch_gen6.jump_count = 0;
- brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
- brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
- } else {
- brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
- brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
- brw_set_src1(p, insn, brw_imm_ud(0));
- insn->bits3.break_cont.jip = 0;
- insn->bits3.break_cont.uip = 0;
- }
-
- insn->header.execution_size = execute_size;
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.predicate_control = BRW_PREDICATE_NORMAL;
- insn->header.mask_control = BRW_MASK_ENABLE;
- if (!p->single_program_flow)
- insn->header.thread_control = BRW_THREAD_SWITCH;
-
- p->current->header.predicate_control = BRW_PREDICATE_NONE;
-
- push_if_stack(p, insn);
- p->if_depth_in_loop[p->loop_stack_depth]++;
- return insn;
-}
-
-/* This function is only used for gen6-style IF instructions with an
- * embedded comparison (conditional modifier). It is not used on gen7.
- */
-struct brw_instruction *
-gen6_IF(struct brw_compile *p, uint32_t conditional,
- struct brw_reg src0, struct brw_reg src1)
-{
- struct brw_instruction *insn;
-
- insn = next_insn(p, BRW_OPCODE_IF);
-
- brw_set_dest(p, insn, brw_imm_w(0));
- if (p->compressed) {
- insn->header.execution_size = BRW_EXECUTE_16;
- } else {
- insn->header.execution_size = BRW_EXECUTE_8;
- }
- insn->bits1.branch_gen6.jump_count = 0;
- brw_set_src0(p, insn, src0);
- brw_set_src1(p, insn, src1);
-
- assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
- assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
- insn->header.destreg__conditionalmod = conditional;
-
- if (!p->single_program_flow)
- insn->header.thread_control = BRW_THREAD_SWITCH;
-
- push_if_stack(p, insn);
- return insn;
-}
-
-/**
- * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
- */
-static void
-convert_IF_ELSE_to_ADD(struct brw_compile *p,
- struct brw_instruction *if_inst,
- struct brw_instruction *else_inst)
-{
- /* The next instruction (where the ENDIF would be, if it existed) */
- struct brw_instruction *next_inst = &p->store[p->nr_insn];
-
- assert(p->single_program_flow);
- assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
- assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
- assert(if_inst->header.execution_size == BRW_EXECUTE_1);
-
- /* Convert IF to an ADD instruction that moves the instruction pointer
- * to the first instruction of the ELSE block. If there is no ELSE
- * block, point to where ENDIF would be. Reverse the predicate.
- *
- * There's no need to execute an ENDIF since we don't need to do any
- * stack operations, and if we're currently executing, we just want to
- * continue normally.
- */
- if_inst->header.opcode = BRW_OPCODE_ADD;
- if_inst->header.predicate_inverse = 1;
-
- if (else_inst != NULL) {
- /* Convert ELSE to an ADD instruction that points where the ENDIF
- * would be.
- */
- else_inst->header.opcode = BRW_OPCODE_ADD;
-
- if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
- else_inst->bits3.ud = (next_inst - else_inst) * 16;
- } else {
- if_inst->bits3.ud = (next_inst - if_inst) * 16;
- }
-}
-
-/**
- * Patch IF and ELSE instructions with appropriate jump targets.
- */
-static void
-patch_IF_ELSE(struct brw_compile *p,
- struct brw_instruction *if_inst,
- struct brw_instruction *else_inst,
- struct brw_instruction *endif_inst)
-{
- struct intel_context *intel = &p->brw->intel;
-
- /* We shouldn't be patching IF and ELSE instructions in single program flow
- * mode when gen < 6, because in single program flow mode on those
- * platforms, we convert flow control instructions to conditional ADDs that
- * operate on IP (see brw_ENDIF).
- *
- * However, on Gen6, writing to IP doesn't work in single program flow mode
- * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
- * not be updated by non-flow control instructions."). And on later
- * platforms, there is no significant benefit to converting control flow
- * instructions to conditional ADDs. So we do patch IF and ELSE
- * instructions in single program flow mode on those platforms.
- */
- if (intel->gen < 6)
- assert(!p->single_program_flow);
-
- assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
- assert(endif_inst != NULL);
- assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
-
- unsigned br = 1;
- /* Jump count is for 64bit data chunk each, so one 128bit instruction
- * requires 2 chunks.
- */
- if (intel->gen >= 5)
- br = 2;
-
- assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
- endif_inst->header.execution_size = if_inst->header.execution_size;
-
- if (else_inst == NULL) {
- /* Patch IF -> ENDIF */
- if (intel->gen < 6) {
- /* Turn it into an IFF, which means no mask stack operations for
- * all-false and jumping past the ENDIF.
- */
- if_inst->header.opcode = BRW_OPCODE_IFF;
- if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
- if_inst->bits3.if_else.pop_count = 0;
- if_inst->bits3.if_else.pad0 = 0;
- } else if (intel->gen == 6) {
- /* As of gen6, there is no IFF and IF must point to the ENDIF. */
- if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
- } else {
- if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
- if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
- }
- } else {
- else_inst->header.execution_size = if_inst->header.execution_size;
-
- /* Patch IF -> ELSE */
- if (intel->gen < 6) {
- if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
- if_inst->bits3.if_else.pop_count = 0;
- if_inst->bits3.if_else.pad0 = 0;
- } else if (intel->gen == 6) {
- if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
- }
-
- /* Patch ELSE -> ENDIF */
- if (intel->gen < 6) {
- /* BRW_OPCODE_ELSE pre-gen6 should point just past the
- * matching ENDIF.
- */
- else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
- else_inst->bits3.if_else.pop_count = 1;
- else_inst->bits3.if_else.pad0 = 0;
- } else if (intel->gen == 6) {
- /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
- else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
- } else {
- /* The IF instruction's JIP should point just past the ELSE */
- if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
- /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
- if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
- else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
- }
- }
-}
-
-void
-brw_ELSE(struct brw_compile *p)
-{
- struct intel_context *intel = &p->brw->intel;
- struct brw_instruction *insn;
-
- insn = next_insn(p, BRW_OPCODE_ELSE);
-
- if (intel->gen < 6) {
- brw_set_dest(p, insn, brw_ip_reg());
- brw_set_src0(p, insn, brw_ip_reg());
- brw_set_src1(p, insn, brw_imm_d(0x0));
- } else if (intel->gen == 6) {
- brw_set_dest(p, insn, brw_imm_w(0));
- insn->bits1.branch_gen6.jump_count = 0;
- brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- } else {
- brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src1(p, insn, brw_imm_ud(0));
- insn->bits3.break_cont.jip = 0;
- insn->bits3.break_cont.uip = 0;
- }
-
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.mask_control = BRW_MASK_ENABLE;
- if (!p->single_program_flow)
- insn->header.thread_control = BRW_THREAD_SWITCH;
-
- push_if_stack(p, insn);
-}
-
-void
-brw_ENDIF(struct brw_compile *p)
-{
- struct intel_context *intel = &p->brw->intel;
- struct brw_instruction *insn = NULL;
- struct brw_instruction *else_inst = NULL;
- struct brw_instruction *if_inst = NULL;
- struct brw_instruction *tmp;
- bool emit_endif = true;
-
- /* In single program flow mode, we can express IF and ELSE instructions
- * equivalently as ADD instructions that operate on IP. On platforms prior
- * to Gen6, flow control instructions cause an implied thread switch, so
- * this is a significant savings.
- *
- * However, on Gen6, writing to IP doesn't work in single program flow mode
- * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
- * not be updated by non-flow control instructions."). And on later
- * platforms, there is no significant benefit to converting control flow
- * instructions to conditional ADDs. So we only do this trick on Gen4 and
- * Gen5.
- */
- if (intel->gen < 6 && p->single_program_flow)
- emit_endif = false;
-
- /*
- * A single next_insn() may change the base adress of instruction store
- * memory(p->store), so call it first before referencing the instruction
- * store pointer from an index
- */
- if (emit_endif)
- insn = next_insn(p, BRW_OPCODE_ENDIF);
-
- /* Pop the IF and (optional) ELSE instructions from the stack */
- p->if_depth_in_loop[p->loop_stack_depth]--;
- tmp = pop_if_stack(p);
- if (tmp->header.opcode == BRW_OPCODE_ELSE) {
- else_inst = tmp;
- tmp = pop_if_stack(p);
- }
- if_inst = tmp;
-
- if (!emit_endif) {
- /* ENDIF is useless; don't bother emitting it. */
- convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
- return;
- }
-
- if (intel->gen < 6) {
- brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
- brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
- brw_set_src1(p, insn, brw_imm_d(0x0));
- } else if (intel->gen == 6) {
- brw_set_dest(p, insn, brw_imm_w(0));
- brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- } else {
- brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src1(p, insn, brw_imm_ud(0));
- }
-
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.mask_control = BRW_MASK_ENABLE;
- insn->header.thread_control = BRW_THREAD_SWITCH;
-
- /* Also pop item off the stack in the endif instruction: */
- if (intel->gen < 6) {
- insn->bits3.if_else.jump_count = 0;
- insn->bits3.if_else.pop_count = 1;
- insn->bits3.if_else.pad0 = 0;
- } else if (intel->gen == 6) {
- insn->bits1.branch_gen6.jump_count = 2;
- } else {
- insn->bits3.break_cont.jip = 2;
- }
- patch_IF_ELSE(p, if_inst, else_inst, insn);
-}
-
-struct brw_instruction *brw_BREAK(struct brw_compile *p)
-{
- struct intel_context *intel = &p->brw->intel;
- struct brw_instruction *insn;
-
- insn = next_insn(p, BRW_OPCODE_BREAK);
- if (intel->gen >= 6) {
- brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src1(p, insn, brw_imm_d(0x0));
- } else {
- brw_set_dest(p, insn, brw_ip_reg());
- brw_set_src0(p, insn, brw_ip_reg());
- brw_set_src1(p, insn, brw_imm_d(0x0));
- insn->bits3.if_else.pad0 = 0;
- insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
- }
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.execution_size = BRW_EXECUTE_8;
-
- return insn;
-}
-
-struct brw_instruction *gen6_CONT(struct brw_compile *p)
-{
- struct brw_instruction *insn;
-
- insn = next_insn(p, BRW_OPCODE_CONTINUE);
- brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_dest(p, insn, brw_ip_reg());
- brw_set_src0(p, insn, brw_ip_reg());
- brw_set_src1(p, insn, brw_imm_d(0x0));
-
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.execution_size = BRW_EXECUTE_8;
- return insn;
-}
-
-struct brw_instruction *brw_CONT(struct brw_compile *p)
-{
- struct brw_instruction *insn;
- insn = next_insn(p, BRW_OPCODE_CONTINUE);
- brw_set_dest(p, insn, brw_ip_reg());
- brw_set_src0(p, insn, brw_ip_reg());
- brw_set_src1(p, insn, brw_imm_d(0x0));
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.execution_size = BRW_EXECUTE_8;
- /* insn->header.mask_control = BRW_MASK_DISABLE; */
- insn->bits3.if_else.pad0 = 0;
- insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
- return insn;
-}
-
-struct brw_instruction *gen6_HALT(struct brw_compile *p)
-{
- struct brw_instruction *insn;
-
- insn = next_insn(p, BRW_OPCODE_HALT);
- brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
-
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.execution_size = BRW_EXECUTE_8;
- return insn;
-}
-
-/* DO/WHILE loop:
- *
- * The DO/WHILE is just an unterminated loop -- break or continue are
- * used for control within the loop. We have a few ways they can be
- * done.
- *
- * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
- * jip and no DO instruction.
- *
- * For non-uniform control flow pre-gen6, there's a DO instruction to
- * push the mask, and a WHILE to jump back, and BREAK to get out and
- * pop the mask.
- *
- * For gen6, there's no more mask stack, so no need for DO. WHILE
- * just points back to the first instruction of the loop.
- */
-struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
-{
- struct intel_context *intel = &p->brw->intel;
-
- if (intel->gen >= 6 || p->single_program_flow) {
- push_loop_stack(p, &p->store[p->nr_insn]);
- return &p->store[p->nr_insn];
- } else {
- struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
-
- push_loop_stack(p, insn);
-
- /* Override the defaults for this instruction:
- */
- brw_set_dest(p, insn, brw_null_reg());
- brw_set_src0(p, insn, brw_null_reg());
- brw_set_src1(p, insn, brw_null_reg());
-
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.execution_size = execute_size;
- insn->header.predicate_control = BRW_PREDICATE_NONE;
- /* insn->header.mask_control = BRW_MASK_ENABLE; */
- /* insn->header.mask_control = BRW_MASK_DISABLE; */
-
- return insn;
- }
-}
-
-/**
- * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
- * instruction here.
- *
- * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
- * nesting, since it can always just point to the end of the block/current loop.
- */
-static void
-brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
-{
- struct intel_context *intel = &p->brw->intel;
- struct brw_instruction *do_inst = get_inner_do_insn(p);
- struct brw_instruction *inst;
- int br = (intel->gen == 5) ? 2 : 1;
-
- for (inst = while_inst - 1; inst != do_inst; inst--) {
- /* If the jump count is != 0, that means that this instruction has already
- * been patched because it's part of a loop inside of the one we're
- * patching.
- */
- if (inst->header.opcode == BRW_OPCODE_BREAK &&
- inst->bits3.if_else.jump_count == 0) {
- inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
- } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
- inst->bits3.if_else.jump_count == 0) {
- inst->bits3.if_else.jump_count = br * (while_inst - inst);
- }
- }
-}
-
-struct brw_instruction *brw_WHILE(struct brw_compile *p)
-{
- struct intel_context *intel = &p->brw->intel;
- struct brw_instruction *insn, *do_insn;
- GLuint br = 1;
-
- if (intel->gen >= 5)
- br = 2;
-
- if (intel->gen >= 7) {
- insn = next_insn(p, BRW_OPCODE_WHILE);
- do_insn = get_inner_do_insn(p);
-
- brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src1(p, insn, brw_imm_ud(0));
- insn->bits3.break_cont.jip = br * (do_insn - insn);
-
- insn->header.execution_size = BRW_EXECUTE_8;
- } else if (intel->gen == 6) {
- insn = next_insn(p, BRW_OPCODE_WHILE);
- do_insn = get_inner_do_insn(p);
-
- brw_set_dest(p, insn, brw_imm_w(0));
- insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
- brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-
- insn->header.execution_size = BRW_EXECUTE_8;
- } else {
- if (p->single_program_flow) {
- insn = next_insn(p, BRW_OPCODE_ADD);
- do_insn = get_inner_do_insn(p);
-
- brw_set_dest(p, insn, brw_ip_reg());
- brw_set_src0(p, insn, brw_ip_reg());
- brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
- insn->header.execution_size = BRW_EXECUTE_1;
- } else {
- insn = next_insn(p, BRW_OPCODE_WHILE);
- do_insn = get_inner_do_insn(p);
-
- assert(do_insn->header.opcode == BRW_OPCODE_DO);
-
- brw_set_dest(p, insn, brw_ip_reg());
- brw_set_src0(p, insn, brw_ip_reg());
- brw_set_src1(p, insn, brw_imm_d(0));
-
- insn->header.execution_size = do_insn->header.execution_size;
- insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
- insn->bits3.if_else.pop_count = 0;
- insn->bits3.if_else.pad0 = 0;
-
- brw_patch_break_cont(p, insn);
- }
- }
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- p->current->header.predicate_control = BRW_PREDICATE_NONE;
-
- p->loop_stack_depth--;
-
- return insn;
-}
-
-
-/* FORWARD JUMPS:
- */
-void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
-{
- struct intel_context *intel = &p->brw->intel;
- struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
- GLuint jmpi = 1;
-
- if (intel->gen >= 5)
- jmpi = 2;
-
- assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
- assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
-
- jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
-}
-
-
-
/* To integrate with the above, it makes sense that the comparison
* instruction should populate the flag register. It might be simpler
* just to use the flag reg for most WM tasks?
*/
void brw_CMP(struct brw_compile *p,
- struct brw_reg dest,
- GLuint conditional,
- struct brw_reg src0,
- struct brw_reg src1)
+ struct brw_reg dest,
+ uint32_t conditional,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
* Helpers for the various SEND message types:
*/
-/** Extended math function, float[8].
- */
-void brw_math( struct brw_compile *p,
- struct brw_reg dest,
- GLuint function,
- GLuint saturate,
- GLuint msg_reg_nr,
- struct brw_reg src,
- GLuint data_type,
- GLuint precision )
+/** Extended math function, float[8]. */
+void brw_math(struct brw_compile *p,
+ struct brw_reg dest,
+ uint32_t function,
+ uint32_t saturate,
+ uint32_t msg_reg_nr,
+ struct brw_reg src,
+ uint32_t data_type,
+ uint32_t precision)
{
- struct intel_context *intel = &p->brw->intel;
-
- if (intel->gen >= 6) {
+ if (p->gen >= 6) {
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
assert(dest.file == BRW_GENERAL_REGISTER_FILE);
assert(src.file == BRW_GENERAL_REGISTER_FILE);
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
- if (intel->gen == 6)
- assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
+ if (p->gen == 6)
+ assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
/* Source modifiers are ignored for extended math instructions on Gen6. */
- if (intel->gen == 6) {
- assert(!src.negate);
- assert(!src.abs);
+ if (p->gen == 6) {
+ assert(!src.negate);
+ assert(!src.abs);
}
if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
- function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
- function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
- assert(src.type != BRW_REGISTER_TYPE_F);
+ function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
+ function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
+ assert(src.type != BRW_REGISTER_TYPE_F);
} else {
- assert(src.type == BRW_REGISTER_TYPE_F);
+ assert(src.type == BRW_REGISTER_TYPE_F);
}
/* Math is the same ISA format as other opcodes, except that CondModifier
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
brw_set_math_message(p,
- insn,
- function,
- src.type == BRW_REGISTER_TYPE_D,
- precision,
- saturate,
- data_type);
+ insn,
+ function,
+ src.type == BRW_REGISTER_TYPE_D,
+ precision,
+ saturate,
+ data_type);
}
}
/** Extended math function, float[8].
*/
void brw_math2(struct brw_compile *p,
- struct brw_reg dest,
- GLuint function,
- struct brw_reg src0,
- struct brw_reg src1)
+ struct brw_reg dest,
+ uint32_t function,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
- struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
- assert(intel->gen >= 6);
- (void) intel;
+ assert(p->gen >= 6);
assert(dest.file == BRW_GENERAL_REGISTER_FILE);
assert(src1.file == BRW_GENERAL_REGISTER_FILE);
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
- if (intel->gen == 6) {
+ if (p->gen == 6) {
assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
}
}
/* Source modifiers are ignored for extended math instructions on Gen6. */
- if (intel->gen == 6) {
+ if (p->gen == 6) {
assert(!src0.negate);
assert(!src0.abs);
assert(!src1.negate);
* Extended math function, float[16].
* Use 2 send instructions.
*/
-void brw_math_16( struct brw_compile *p,
- struct brw_reg dest,
- GLuint function,
- GLuint saturate,
- GLuint msg_reg_nr,
- struct brw_reg src,
- GLuint precision )
+void brw_math_16(struct brw_compile *p,
+ struct brw_reg dest,
+ uint32_t function,
+ uint32_t saturate,
+ uint32_t msg_reg_nr,
+ struct brw_reg src,
+ uint32_t precision)
{
- struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
- if (intel->gen >= 6) {
+ if (p->gen >= 6) {
insn = next_insn(p, BRW_OPCODE_MATH);
/* Math is the same ISA format as other opcodes, except that CondModifier
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
brw_set_math_message(p,
- insn,
- function,
- BRW_MATH_INTEGER_UNSIGNED,
- precision,
- saturate,
- BRW_MATH_DATA_VECTOR);
+ insn,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ BRW_MATH_DATA_VECTOR);
/* Second instruction:
*/
brw_set_dest(p, insn, offset(dest,1));
brw_set_src0(p, insn, src);
brw_set_math_message(p,
- insn,
- function,
- BRW_MATH_INTEGER_UNSIGNED,
- precision,
- saturate,
- BRW_MATH_DATA_VECTOR);
+ insn,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ BRW_MATH_DATA_VECTOR);
brw_pop_insn_state(p);
}
* register spilling.
*/
void brw_oword_block_write_scratch(struct brw_compile *p,
- struct brw_reg mrf,
- int num_regs,
- GLuint offset)
+ struct brw_reg mrf,
+ int num_regs,
+ uint32_t offset)
{
- struct intel_context *intel = &p->brw->intel;
uint32_t msg_control, msg_type;
int mlen;
- if (intel->gen >= 6)
+ if (p->gen >= 6)
offset /= 16;
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
/* set message header global offset field (reg 0, element 2) */
brw_MOV(p,
- retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
- mrf.nr,
- 2), BRW_REGISTER_TYPE_UD),
- brw_imm_ud(offset));
+ retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+ mrf.nr,
+ 2), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(offset));
brw_pop_insn_state(p);
}
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
int send_commit_msg;
struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
- BRW_REGISTER_TYPE_UW);
+ BRW_REGISTER_TYPE_UW);
if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- src_header = vec16(src_header);
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ src_header = vec16(src_header);
}
assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
insn->header.destreg__conditionalmod = mrf.nr;
* protection. Our use of DP writes is all about register
* spilling within a thread.
*/
- if (intel->gen >= 6) {
- dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
- send_commit_msg = 0;
+ if (p->gen >= 6) {
+ dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+ send_commit_msg = 0;
} else {
- dest = src_header;
- send_commit_msg = 1;
+ dest = src_header;
+ send_commit_msg = 1;
}
brw_set_dest(p, insn, dest);
- if (intel->gen >= 6) {
- brw_set_src0(p, insn, mrf);
+ if (p->gen >= 6) {
+ brw_set_src0(p, insn, mrf);
} else {
- brw_set_src0(p, insn, brw_null_reg());
+ brw_set_src0(p, insn, brw_null_reg());
}
- if (intel->gen >= 6)
- msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
+ if (p->gen >= 6)
+ msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
else
- msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
+ msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
brw_set_dp_write_message(p,
- insn,
- 255, /* binding table index (255=stateless) */
- msg_control,
- msg_type,
- mlen,
- true, /* header_present */
- 0, /* not a render target */
- send_commit_msg, /* response_length */
- 0, /* eot */
- send_commit_msg);
+ insn,
+ 255, /* binding table index (255=stateless) */
+ msg_control,
+ msg_type,
+ mlen,
+ true, /* header_present */
+ 0, /* not a render target */
+ send_commit_msg, /* response_length */
+ 0, /* eot */
+ send_commit_msg);
}
}
*/
void
brw_oword_block_read_scratch(struct brw_compile *p,
- struct brw_reg dest,
- struct brw_reg mrf,
- int num_regs,
- GLuint offset)
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ int num_regs,
+ uint32_t offset)
{
- struct intel_context *intel = &p->brw->intel;
uint32_t msg_control;
int rlen;
- if (intel->gen >= 6)
+ if (p->gen >= 6)
offset /= 16;
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
/* set message header global offset field (reg 0, element 2) */
brw_MOV(p,
- retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
- mrf.nr,
- 2), BRW_REGISTER_TYPE_UD),
- brw_imm_ud(offset));
+ retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+ mrf.nr,
+ 2), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(offset));
brw_pop_insn_state(p);
}
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditionalmod = mrf.nr;
- brw_set_dest(p, insn, dest); /* UW? */
- if (intel->gen >= 6) {
- brw_set_src0(p, insn, mrf);
+ brw_set_dest(p, insn, dest); /* UW? */
+ if (p->gen >= 6) {
+ brw_set_src0(p, insn, mrf);
} else {
- brw_set_src0(p, insn, brw_null_reg());
+ brw_set_src0(p, insn, brw_null_reg());
}
brw_set_dp_read_message(p,
- insn,
- 255, /* binding table index (255=stateless) */
- msg_control,
- BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
- BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
- 1, /* msg_length */
- rlen);
+ insn,
+ 255, /* binding table index (255=stateless) */
+ msg_control,
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
+ 1, /* msg_length */
+ rlen);
}
}
* Used for fetching shader constants.
*/
void brw_oword_block_read(struct brw_compile *p,
- struct brw_reg dest,
- struct brw_reg mrf,
- uint32_t offset,
- uint32_t bind_table_index)
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ uint32_t offset,
+ uint32_t bind_table_index)
{
- struct intel_context *intel = &p->brw->intel;
/* On newer hardware, offset is in units of owords. */
- if (intel->gen >= 6)
+ if (p->gen >= 6)
offset /= 16;
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
/* set message header global offset field (reg 0, element 2) */
brw_MOV(p,
- retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
- mrf.nr,
- 2), BRW_REGISTER_TYPE_UD),
- brw_imm_ud(offset));
+ retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+ mrf.nr,
+ 2), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(offset));
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.destreg__conditionalmod = mrf.nr;
dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
brw_set_dest(p, insn, dest);
- if (intel->gen >= 6) {
+ if (p->gen >= 6) {
brw_set_src0(p, insn, mrf);
} else {
brw_set_src0(p, insn, brw_null_reg());
}
brw_set_dp_read_message(p,
- insn,
- bind_table_index,
- BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
- BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
- BRW_DATAPORT_READ_TARGET_DATA_CACHE,
- 1, /* msg_length */
- 1); /* response_length (1 reg, 2 owords!) */
+ insn,
+ bind_table_index,
+ BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
+ BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+ 1, /* msg_length */
+ 1); /* response_length (1 reg, 2 owords!) */
brw_pop_insn_state(p);
}
* the provided mrf header reg.
*/
void brw_dword_scattered_read(struct brw_compile *p,
- struct brw_reg dest,
- struct brw_reg mrf,
- uint32_t bind_table_index)
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ uint32_t bind_table_index)
{
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
brw_set_src0(p, insn, brw_null_reg());
brw_set_dp_read_message(p,
- insn,
- bind_table_index,
- BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
- BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
- BRW_DATAPORT_READ_TARGET_DATA_CACHE,
- 2, /* msg_length */
- 1); /* response_length */
+ insn,
+ bind_table_index,
+ BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
+ BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
+ BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+ 2, /* msg_length */
+ 1); /* response_length */
}
-
-
/**
* Read float[4] constant(s) from VS constant buffer.
* For relative addressing, two float[4] constants will be read into 'dest'.
*/
void brw_dp_READ_4_vs(struct brw_compile *p,
struct brw_reg dest,
- GLuint location,
- GLuint bind_table_index)
+ uint32_t location,
+ uint32_t bind_table_index)
{
- struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
- GLuint msg_reg_nr = 1;
+ uint32_t msg_reg_nr = 1;
- if (intel->gen >= 6)
+ if (p->gen >= 6)
location /= 16;
/* Setup MRF[1] with location/offset into const buffer */
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
- BRW_REGISTER_TYPE_UD),
- brw_imm_ud(location));
+ BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(location));
brw_pop_insn_state(p);
insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.mask_control = BRW_MASK_DISABLE;
brw_set_dest(p, insn, dest);
- if (intel->gen >= 6) {
+ if (p->gen >= 6) {
brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
} else {
brw_set_src0(p, insn, brw_null_reg());
}
brw_set_dp_read_message(p,
- insn,
- bind_table_index,
- 0,
- BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
- BRW_DATAPORT_READ_TARGET_DATA_CACHE,
- 1, /* msg_length */
- 1); /* response_length (1 Oword) */
+ insn,
+ bind_table_index,
+ 0,
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+ 1, /* msg_length */
+ 1); /* response_length (1 Oword) */
}
/**
* relative addressing.
*/
void brw_dp_READ_4_vs_relative(struct brw_compile *p,
- struct brw_reg dest,
- struct brw_reg addr_reg,
- GLuint offset,
- GLuint bind_table_index)
+ struct brw_reg dest,
+ struct brw_reg addr_reg,
+ uint32_t offset,
+ uint32_t bind_table_index)
{
- struct intel_context *intel = &p->brw->intel;
struct brw_reg src = brw_vec8_grf(0, 0);
int msg_type;
* fields ignored.
*/
brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
- addr_reg, brw_imm_d(offset));
+ addr_reg, brw_imm_d(offset));
brw_pop_insn_state(p);
gen6_resolve_implied_move(p, &src, 0);
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
- if (intel->gen >= 6)
- msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
- else if (intel->gen == 5 || intel->is_g4x)
- msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
- else
- msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+ msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
brw_set_dp_read_message(p,
- insn,
- bind_table_index,
- BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
- msg_type,
- BRW_DATAPORT_READ_TARGET_DATA_CACHE,
- 2, /* msg_length */
- 1); /* response_length */
+ insn,
+ bind_table_index,
+ BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+ msg_type,
+ BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+ 2, /* msg_length */
+ 1); /* response_length */
}
-
-
void brw_fb_WRITE(struct brw_compile *p,
- int dispatch_width,
- GLuint msg_reg_nr,
+ int dispatch_width,
+ uint32_t msg_reg_nr,
struct brw_reg src0,
- GLuint binding_table_index,
- GLuint msg_length,
- GLuint response_length,
+ uint32_t binding_table_index,
+ uint32_t msg_length,
+ uint32_t response_length,
bool eot,
bool header_present)
{
- struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
- GLuint msg_control, msg_type;
+ uint32_t msg_control, msg_type;
struct brw_reg dest;
if (dispatch_width == 16)
else
dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
- if (intel->gen >= 6 && binding_table_index == 0) {
+ if (p->gen >= 6 && binding_table_index == 0) {
insn = next_insn(p, BRW_OPCODE_SENDC);
} else {
insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = 0;
insn->header.compression_control = BRW_COMPRESSION_NONE;
- if (intel->gen >= 6) {
+ if (p->gen >= 6) {
/* headerless version, just submit color payload */
src0 = brw_message_reg(msg_reg_nr);
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_dp_write_message(p,
- insn,
- binding_table_index,
- msg_control,
- msg_type,
- msg_length,
- header_present,
- eot, /* last render target write */
- response_length,
- eot,
- 0 /* send_commit_msg */);
+ insn,
+ binding_table_index,
+ msg_control,
+ msg_type,
+ msg_length,
+ header_present,
+ eot, /* last render target write */
+ response_length,
+ eot,
+ 0 /* send_commit_msg */);
}
* of sampling operation is performed. See volume 4, page 161 of docs.
*/
void brw_SAMPLE(struct brw_compile *p,
- struct brw_reg dest,
- GLuint msg_reg_nr,
- struct brw_reg src0,
- GLuint binding_table_index,
- GLuint sampler,
- GLuint writemask,
- GLuint msg_type,
- GLuint response_length,
- GLuint msg_length,
- GLuint header_present,
- GLuint simd_mode,
- GLuint return_format)
+ struct brw_reg dest,
+ uint32_t msg_reg_nr,
+ struct brw_reg src0,
+ uint32_t binding_table_index,
+ uint32_t sampler,
+ uint32_t writemask,
+ uint32_t msg_type,
+ uint32_t response_length,
+ uint32_t msg_length,
+ uint32_t header_present,
+ uint32_t simd_mode,
+ uint32_t return_format)
{
- struct intel_context *intel = &p->brw->intel;
bool need_stall = 0;
if (writemask == 0) {
* needed.
*/
if (writemask != WRITEMASK_XYZW) {
- GLuint dst_offset = 0;
- GLuint i, newmask = 0, len = 0;
+ uint32_t dst_offset = 0;
+ uint32_t i, newmask = 0, len = 0;
for (i = 0; i < 4; i++) {
- if (writemask & (1<<i))
- break;
- dst_offset += 2;
+ if (writemask & (1<<i))
+ break;
+ dst_offset += 2;
}
for (; i < 4; i++) {
- if (!(writemask & (1<<i)))
- break;
- newmask |= 1<<i;
- len++;
+ if (!(writemask & (1<<i)))
+ break;
+ newmask |= 1<<i;
+ len++;
}
if (newmask != writemask) {
- need_stall = 1;
+ need_stall = 1;
/* printf("need stall %x %x\n", newmask , writemask); */
}
else {
- bool dispatch_16 = false;
+ bool dispatch_16 = false;
- struct brw_reg m1 = brw_message_reg(msg_reg_nr);
+ struct brw_reg m1 = brw_message_reg(msg_reg_nr);
- guess_execution_size(p, p->current, dest);
- if (p->current->header.execution_size == BRW_EXECUTE_16)
- dispatch_16 = true;
+ guess_execution_size(p, p->current, dest);
+ if (p->current->header.execution_size == BRW_EXECUTE_16)
+ dispatch_16 = true;
- newmask = ~newmask & WRITEMASK_XYZW;
+ newmask = ~newmask & WRITEMASK_XYZW;
- brw_push_insn_state(p);
+ brw_push_insn_state(p);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
- retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
- brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
+ brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
+ retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
- brw_pop_insn_state(p);
+ brw_pop_insn_state(p);
- src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
- dest = offset(dest, dst_offset);
+ src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+ dest = offset(dest, dst_offset);
- /* For 16-wide dispatch, masked channels are skipped in the
- * response. For 8-wide, masked channels still take up slots,
- * and are just not written to.
- */
- if (dispatch_16)
- response_length = len * 2;
+ /* For 16-wide dispatch, masked channels are skipped in the
+ * response. For 8-wide, masked channels still take up slots,
+ * and are just not written to.
+ */
+ if (dispatch_16)
+ response_length = len * 2;
}
}
insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
- if (intel->gen < 6)
- insn->header.destreg__conditionalmod = msg_reg_nr;
+ if (p->gen < 6)
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_sampler_message(p, insn,
- binding_table_index,
- sampler,
- msg_type,
- response_length,
- msg_length,
- header_present,
- simd_mode,
- return_format);
+ binding_table_index,
+ sampler,
+ msg_type,
+ response_length,
+ msg_length,
+ header_present,
+ simd_mode,
+ return_format);
}
if (need_stall) {
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
- retype(reg, BRW_REGISTER_TYPE_UD));
+ retype(reg, BRW_REGISTER_TYPE_UD));
brw_pop_insn_state(p);
}
}
-/* All these variables are pretty confusing - we might be better off
- * using bitmasks and macros for this, in the old style. Or perhaps
- * just having the caller instantiate the fields in dword3 itself.
- */
-void brw_urb_WRITE(struct brw_compile *p,
- struct brw_reg dest,
- GLuint msg_reg_nr,
- struct brw_reg src0,
- bool allocate,
- bool used,
- GLuint msg_length,
- GLuint response_length,
- bool eot,
- bool writes_complete,
- GLuint offset,
- GLuint swizzle)
-{
- struct intel_context *intel = &p->brw->intel;
- struct brw_instruction *insn;
-
- gen6_resolve_implied_move(p, &src0, msg_reg_nr);
-
- if (intel->gen == 7) {
- /* Enable Channel Masks in the URB_WRITE_HWORD message header */
- brw_push_insn_state(p);
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
- BRW_REGISTER_TYPE_UD),
- retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
- brw_imm_ud(0xff00));
- brw_pop_insn_state(p);
- }
-
- insn = next_insn(p, BRW_OPCODE_SEND);
-
- assert(msg_length < BRW_MAX_MRF);
-
- brw_set_dest(p, insn, dest);
- brw_set_src0(p, insn, src0);
- brw_set_src1(p, insn, brw_imm_d(0));
-
- if (intel->gen < 6)
- insn->header.destreg__conditionalmod = msg_reg_nr;
-
- brw_set_urb_message(p,
- insn,
- allocate,
- used,
- msg_length,
- response_length,
- eot,
- writes_complete,
- offset,
- swizzle);
-}
-
-static int
-brw_find_next_block_end(struct brw_compile *p, int start)
-{
- int ip;
-
- for (ip = start + 1; ip < p->nr_insn; ip++) {
- struct brw_instruction *insn = &p->store[ip];
-
- switch (insn->header.opcode) {
- case BRW_OPCODE_ENDIF:
- case BRW_OPCODE_ELSE:
- case BRW_OPCODE_WHILE:
- return ip;
- }
- }
-
- return 0;
-}
-
-/* There is no DO instruction on gen6, so to find the end of the loop
- * we have to see if the loop is jumping back before our start
- * instruction.
- */
-static int
-brw_find_loop_end(struct brw_compile *p, int start)
-{
- struct intel_context *intel = &p->brw->intel;
- int ip;
- int br = 2;
-
- for (ip = start + 1; ip < p->nr_insn; ip++) {
- struct brw_instruction *insn = &p->store[ip];
-
- if (insn->header.opcode == BRW_OPCODE_WHILE) {
- int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
- : insn->bits3.break_cont.jip;
- if (ip + jip / br <= start)
- return ip;
- }
- }
- assert(!"not reached");
- return start + 1;
-}
-
-/* After program generation, go back and update the UIP and JIP of
- * BREAK, CONT, and HALT instructions to their correct locations.
- */
-void
-brw_set_uip_jip(struct brw_compile *p)
-{
- struct intel_context *intel = &p->brw->intel;
- int ip;
- int br = 2;
-
- if (intel->gen < 6)
- return;
-
- for (ip = 0; ip < p->nr_insn; ip++) {
- struct brw_instruction *insn = &p->store[ip];
- int block_end_ip = 0;
-
- if (insn->header.opcode == BRW_OPCODE_BREAK ||
- insn->header.opcode == BRW_OPCODE_CONTINUE ||
- insn->header.opcode == BRW_OPCODE_HALT) {
- block_end_ip = brw_find_next_block_end(p, ip);
- }
-
- switch (insn->header.opcode) {
- case BRW_OPCODE_BREAK:
- assert(block_end_ip != 0);
- insn->bits3.break_cont.jip = br * (block_end_ip - ip);
- /* Gen7 UIP points to WHILE; Gen6 points just after it */
- insn->bits3.break_cont.uip =
- br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
- break;
- case BRW_OPCODE_CONTINUE:
- assert(block_end_ip != 0);
- insn->bits3.break_cont.jip = br * (block_end_ip - ip);
- insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
-
- assert(insn->bits3.break_cont.uip != 0);
- assert(insn->bits3.break_cont.jip != 0);
- break;
- case BRW_OPCODE_HALT:
- /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
- *
- * "In case of the halt instruction not inside any conditional code
- * block, the value of <JIP> and <UIP> should be the same. In case
- * of the halt instruction inside conditional code block, the <UIP>
- * should be the end of the program, and the <JIP> should be end of
- * the most inner conditional code block."
- *
- * The uip will have already been set by whoever set up the
- * instruction.
- */
- if (block_end_ip == 0) {
- insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
- } else {
- insn->bits3.break_cont.jip = br * (block_end_ip - ip);
- }
- assert(insn->bits3.break_cont.uip != 0);
- assert(insn->bits3.break_cont.jip != 0);
- break;
- }
- }
-}
-
-void brw_ff_sync(struct brw_compile *p,
- struct brw_reg dest,
- GLuint msg_reg_nr,
- struct brw_reg src0,
- bool allocate,
- GLuint response_length,
- bool eot)
-{
- struct intel_context *intel = &p->brw->intel;
- struct brw_instruction *insn;
-
- gen6_resolve_implied_move(p, &src0, msg_reg_nr);
-
- insn = next_insn(p, BRW_OPCODE_SEND);
- brw_set_dest(p, insn, dest);
- brw_set_src0(p, insn, src0);
- brw_set_src1(p, insn, brw_imm_d(0));
-
- if (intel->gen < 6)
- insn->header.destreg__conditionalmod = msg_reg_nr;
-
- brw_set_ff_sync_message(p,
- insn,
- allocate,
- response_length,
- eot);
-}
-
-/**
- * Emit the SEND instruction necessary to generate stream output data on Gen6
- * (for transform feedback).
- *
- * If send_commit_msg is true, this is the last piece of stream output data
- * from this thread, so send the data as a committed write. According to the
- * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
- *
- * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
- * writes are complete by sending the final write as a committed write."
- */
-void
-brw_svb_write(struct brw_compile *p,
- struct brw_reg dest,
- GLuint msg_reg_nr,
- struct brw_reg src0,
- GLuint binding_table_index,
- bool send_commit_msg)
-{
- struct brw_instruction *insn;
-
- gen6_resolve_implied_move(p, &src0, msg_reg_nr);
-
- insn = next_insn(p, BRW_OPCODE_SEND);
- brw_set_dest(p, insn, dest);
- brw_set_src0(p, insn, src0);
- brw_set_src1(p, insn, brw_imm_d(0));
- brw_set_dp_write_message(p, insn,
- binding_table_index,
- 0, /* msg_control: ignored */
- GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
- 1, /* msg_length */
- true, /* header_present */
- 0, /* last_render_target: ignored */
- send_commit_msg, /* response_length */
- 0, /* end_of_thread */
- send_commit_msg); /* send_commit_msg */
-}
--- /dev/null
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+#ifndef BRW_STRUCTS_H
+#define BRW_STRUCTS_H
+
+#include <stdint.h>
+
+/** Number of general purpose registers (VS, WM, etc) */
+#define BRW_MAX_GRF 128
+
+/**
+ * First GRF used for the MRF hack.
+ *
+ * On gen7, MRFs are no longer used, and contiguous GRFs are used instead. We
+ * haven't converted our compiler to be aware of this, so it asks for MRFs and
+ * brw_eu_emit.c quietly converts them to be accesses of the top GRFs. The
+ * register allocators have to be careful of this to avoid corrupting the "MRF"s
+ * with actual GRF allocations.
+ */
+#define GEN7_MRF_HACK_START 112.
+
+/** Number of message register file registers */
+#define BRW_MAX_MRF 16
+
+struct brw_urb_immediate {
+ uint32_t opcode:4;
+ uint32_t offset:6;
+ uint32_t swizzle_control:2;
+ uint32_t pad:1;
+ uint32_t allocate:1;
+ uint32_t used:1;
+ uint32_t complete:1;
+ uint32_t response_length:4;
+ uint32_t msg_length:4;
+ uint32_t msg_target:4;
+ uint32_t pad1:3;
+ uint32_t end_of_thread:1;
+};
+
+struct brw_sampler_state
+{
+ struct
+ {
+ uint32_t shadow_function:3;
+ uint32_t lod_bias:11;
+ uint32_t min_filter:3;
+ uint32_t mag_filter:3;
+ uint32_t mip_filter:2;
+ uint32_t base_level:5;
+ uint32_t min_mag_neq:1;
+ uint32_t lod_preclamp:1;
+ uint32_t default_color_mode:1;
+ uint32_t pad0:1;
+ uint32_t disable:1;
+ } ss0;
+
+ struct
+ {
+ uint32_t r_wrap_mode:3;
+ uint32_t t_wrap_mode:3;
+ uint32_t s_wrap_mode:3;
+ uint32_t cube_control_mode:1;
+ uint32_t pad:2;
+ uint32_t max_lod:10;
+ uint32_t min_lod:10;
+ } ss1;
+
+ struct
+ {
+ uint32_t pad:5;
+ uint32_t default_color_pointer:27;
+ } ss2;
+
+ struct
+ {
+ uint32_t non_normalized_coord:1;
+ uint32_t pad:12;
+ uint32_t address_round:6;
+ uint32_t max_aniso:3;
+ uint32_t chroma_key_mode:1;
+ uint32_t chroma_key_index:2;
+ uint32_t chroma_key_enable:1;
+ uint32_t monochrome_filter_width:3;
+ uint32_t monochrome_filter_height:3;
+ } ss3;
+};
+
+struct gen7_sampler_state
+{
+ struct
+ {
+ uint32_t aniso_algorithm:1;
+ uint32_t lod_bias:13;
+ uint32_t min_filter:3;
+ uint32_t mag_filter:3;
+ uint32_t mip_filter:2;
+ uint32_t base_level:5;
+ uint32_t pad1:1;
+ uint32_t lod_preclamp:1;
+ uint32_t default_color_mode:1;
+ uint32_t pad0:1;
+ uint32_t disable:1;
+ } ss0;
+
+ struct
+ {
+ uint32_t cube_control_mode:1;
+ uint32_t shadow_function:3;
+ uint32_t pad:4;
+ uint32_t max_lod:12;
+ uint32_t min_lod:12;
+ } ss1;
+
+ struct
+ {
+ uint32_t pad:5;
+ uint32_t default_color_pointer:27;
+ } ss2;
+
+ struct
+ {
+ uint32_t r_wrap_mode:3;
+ uint32_t t_wrap_mode:3;
+ uint32_t s_wrap_mode:3;
+ uint32_t pad:1;
+ uint32_t non_normalized_coord:1;
+ uint32_t trilinear_quality:2;
+ uint32_t address_round:6;
+ uint32_t max_aniso:3;
+ uint32_t chroma_key_mode:1;
+ uint32_t chroma_key_index:2;
+ uint32_t chroma_key_enable:1;
+ uint32_t pad0:6;
+ } ss3;
+};
+
+/* Instruction format for the execution units */
+struct brw_instruction
+{
+ struct
+ {
+ uint32_t opcode:7;
+ uint32_t pad:1;
+ uint32_t access_mode:1;
+ uint32_t mask_control:1;
+ uint32_t dependency_control:2;
+ uint32_t compression_control:2; /* gen6: quater control */
+ uint32_t thread_control:2;
+ uint32_t predicate_control:4;
+ uint32_t predicate_inverse:1;
+ uint32_t execution_size:3;
+ /**
+ * Conditional Modifier for most instructions. On Gen6+, this is also
+ * used for the SEND instruction's Message Target/SFID.
+ */
+ uint32_t destreg__conditionalmod:4;
+ uint32_t acc_wr_control:1;
+ uint32_t cmpt_control:1;
+ uint32_t debug_control:1;
+ uint32_t saturate:1;
+ } header;
+
+ union {
+ struct
+ {
+ uint32_t dest_reg_file:2;
+ uint32_t dest_reg_type:3;
+ uint32_t src0_reg_file:2;
+ uint32_t src0_reg_type:3;
+ uint32_t src1_reg_file:2;
+ uint32_t src1_reg_type:3;
+ uint32_t pad:1;
+ uint32_t dest_subreg_nr:5;
+ uint32_t dest_reg_nr:8;
+ uint32_t dest_horiz_stride:2;
+ uint32_t dest_address_mode:1;
+ } da1;
+
+ struct
+ {
+ uint32_t dest_reg_file:2;
+ uint32_t dest_reg_type:3;
+ uint32_t src0_reg_file:2;
+ uint32_t src0_reg_type:3;
+ uint32_t src1_reg_file:2; /* 0x00000c00 */
+ uint32_t src1_reg_type:3; /* 0x00007000 */
+ uint32_t pad:1;
+ int dest_indirect_offset:10; /* offset against the deref'd address reg */
+ uint32_t dest_subreg_nr:3; /* subnr for the address reg a0.x */
+ uint32_t dest_horiz_stride:2;
+ uint32_t dest_address_mode:1;
+ } ia1;
+
+ struct
+ {
+ uint32_t dest_reg_file:2;
+ uint32_t dest_reg_type:3;
+ uint32_t src0_reg_file:2;
+ uint32_t src0_reg_type:3;
+ uint32_t src1_reg_file:2;
+ uint32_t src1_reg_type:3;
+ uint32_t pad:1;
+ uint32_t dest_writemask:4;
+ uint32_t dest_subreg_nr:1;
+ uint32_t dest_reg_nr:8;
+ uint32_t dest_horiz_stride:2;
+ uint32_t dest_address_mode:1;
+ } da16;
+
+ struct
+ {
+ uint32_t dest_reg_file:2;
+ uint32_t dest_reg_type:3;
+ uint32_t src0_reg_file:2;
+ uint32_t src0_reg_type:3;
+ uint32_t pad0:6;
+ uint32_t dest_writemask:4;
+ int dest_indirect_offset:6;
+ uint32_t dest_subreg_nr:3;
+ uint32_t dest_horiz_stride:2;
+ uint32_t dest_address_mode:1;
+ } ia16;
+
+ struct {
+ uint32_t dest_reg_file:2;
+ uint32_t dest_reg_type:3;
+ uint32_t src0_reg_file:2;
+ uint32_t src0_reg_type:3;
+ uint32_t src1_reg_file:2;
+ uint32_t src1_reg_type:3;
+ uint32_t pad:1;
+
+ int jump_count:16;
+ } branch_gen6;
+
+ struct {
+ uint32_t dest_reg_file:1;
+ uint32_t flag_subreg_num:1;
+ uint32_t pad0:2;
+ uint32_t src0_abs:1;
+ uint32_t src0_negate:1;
+ uint32_t src1_abs:1;
+ uint32_t src1_negate:1;
+ uint32_t src2_abs:1;
+ uint32_t src2_negate:1;
+ uint32_t pad1:7;
+ uint32_t dest_writemask:4;
+ uint32_t dest_subreg_nr:3;
+ uint32_t dest_reg_nr:8;
+ } da3src;
+ } bits1;
+
+
+ union {
+ struct
+ {
+ uint32_t src0_subreg_nr:5;
+ uint32_t src0_reg_nr:8;
+ uint32_t src0_abs:1;
+ uint32_t src0_negate:1;
+ uint32_t src0_address_mode:1;
+ uint32_t src0_horiz_stride:2;
+ uint32_t src0_width:3;
+ uint32_t src0_vert_stride:4;
+ uint32_t flag_reg_nr:1;
+ uint32_t pad:6;
+ } da1;
+
+ struct
+ {
+ int src0_indirect_offset:10;
+ uint32_t src0_subreg_nr:3;
+ uint32_t src0_abs:1;
+ uint32_t src0_negate:1;
+ uint32_t src0_address_mode:1;
+ uint32_t src0_horiz_stride:2;
+ uint32_t src0_width:3;
+ uint32_t src0_vert_stride:4;
+ uint32_t flag_reg_nr:1;
+ uint32_t pad:6;
+ } ia1;
+
+ struct
+ {
+ uint32_t src0_swz_x:2;
+ uint32_t src0_swz_y:2;
+ uint32_t src0_subreg_nr:1;
+ uint32_t src0_reg_nr:8;
+ uint32_t src0_abs:1;
+ uint32_t src0_negate:1;
+ uint32_t src0_address_mode:1;
+ uint32_t src0_swz_z:2;
+ uint32_t src0_swz_w:2;
+ uint32_t pad0:1;
+ uint32_t src0_vert_stride:4;
+ uint32_t flag_reg_nr:1;
+ uint32_t pad1:6;
+ } da16;
+
+ struct
+ {
+ uint32_t src0_swz_x:2;
+ uint32_t src0_swz_y:2;
+ int src0_indirect_offset:6;
+ uint32_t src0_subreg_nr:3;
+ uint32_t src0_abs:1;
+ uint32_t src0_negate:1;
+ uint32_t src0_address_mode:1;
+ uint32_t src0_swz_z:2;
+ uint32_t src0_swz_w:2;
+ uint32_t pad0:1;
+ uint32_t src0_vert_stride:4;
+ uint32_t flag_reg_nr:1;
+ uint32_t pad1:6;
+ } ia16;
+
+ /* Extended Message Descriptor for Ironlake (Gen5) SEND instruction.
+ *
+ * Does not apply to Gen6+. The SFID/message target moved to bits
+ * 27:24 of the header (destreg__conditionalmod); EOT is in bits3.
+ */
+ struct
+ {
+ uint32_t pad:26;
+ uint32_t end_of_thread:1;
+ uint32_t pad1:1;
+ uint32_t sfid:4;
+ } send_gen5; /* for Ironlake only */
+
+ struct {
+ uint32_t src0_rep_ctrl:1;
+ uint32_t src0_swizzle:8;
+ uint32_t src0_subreg_nr:3;
+ uint32_t src0_reg_nr:8;
+ uint32_t pad0:1;
+ uint32_t src1_rep_ctrl:1;
+ uint32_t src1_swizzle:8;
+ uint32_t src1_subreg_nr_low:2;
+ } da3src;
+ } bits2;
+
+ union
+ {
+ struct
+ {
+ uint32_t src1_subreg_nr:5;
+ uint32_t src1_reg_nr:8;
+ uint32_t src1_abs:1;
+ uint32_t src1_negate:1;
+ uint32_t src1_address_mode:1;
+ uint32_t src1_horiz_stride:2;
+ uint32_t src1_width:3;
+ uint32_t src1_vert_stride:4;
+ uint32_t pad0:7;
+ } da1;
+
+ struct
+ {
+ uint32_t src1_swz_x:2;
+ uint32_t src1_swz_y:2;
+ uint32_t src1_subreg_nr:1;
+ uint32_t src1_reg_nr:8;
+ uint32_t src1_abs:1;
+ uint32_t src1_negate:1;
+ uint32_t src1_address_mode:1;
+ uint32_t src1_swz_z:2;
+ uint32_t src1_swz_w:2;
+ uint32_t pad1:1;
+ uint32_t src1_vert_stride:4;
+ uint32_t pad2:7;
+ } da16;
+
+ struct
+ {
+ int src1_indirect_offset:10;
+ uint32_t src1_subreg_nr:3;
+ uint32_t src1_abs:1;
+ uint32_t src1_negate:1;
+ uint32_t src1_address_mode:1;
+ uint32_t src1_horiz_stride:2;
+ uint32_t src1_width:3;
+ uint32_t src1_vert_stride:4;
+ uint32_t flag_reg_nr:1;
+ uint32_t pad1:6;
+ } ia1;
+
+ struct
+ {
+ uint32_t src1_swz_x:2;
+ uint32_t src1_swz_y:2;
+ int src1_indirect_offset:6;
+ uint32_t src1_subreg_nr:3;
+ uint32_t src1_abs:1;
+ uint32_t src1_negate:1;
+ uint32_t pad0:1;
+ uint32_t src1_swz_z:2;
+ uint32_t src1_swz_w:2;
+ uint32_t pad1:1;
+ uint32_t src1_vert_stride:4;
+ uint32_t flag_reg_nr:1;
+ uint32_t pad2:6;
+ } ia16;
+
+
+ struct
+ {
+ int jump_count:16; /* note: signed */
+ uint32_t pop_count:4;
+ uint32_t pad0:12;
+ } if_else;
+
+ /* This is also used for gen7 IF/ELSE instructions */
+ struct
+ {
+ /* Signed jump distance to the ip to jump to if all channels
+ * are disabled after the break or continue. It should point
+ * to the end of the innermost control flow block, as that's
+ * where some channel could get re-enabled.
+ */
+ int jip:16;
+
+ /* Signed jump distance to the location to resume execution
+ * of this channel if it's enabled for the break or continue.
+ */
+ int uip:16;
+ } break_cont;
+
+ /**
+ * \defgroup SEND instructions / Message Descriptors
+ *
+ * @{
+ */
+
+ /**
+ * Generic Message Descriptor for Gen4 SEND instructions. The structs
+ * below expand function_control to something specific for their
+ * message. Due to struct packing issues, they duplicate these bits.
+ *
+ * See the G45 PRM, Volume 4, Table 14-15.
+ */
+ struct {
+ uint32_t function_control:16;
+ uint32_t response_length:4;
+ uint32_t msg_length:4;
+ uint32_t msg_target:4;
+ uint32_t pad1:3;
+ uint32_t end_of_thread:1;
+ } generic;
+
+ /**
+ * Generic Message Descriptor for Gen5-7 SEND instructions.
+ *
+ * See the Sandybridge PRM, Volume 2 Part 2, Table 8-15. (Sadly, most
+ * of the information on the SEND instruction is missing from the public
+ * Ironlake PRM.)
+ *
+ * The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies.
+ * According to the SEND instruction description:
+ * "The MSb of the message description, the EOT field, always comes from
+ * bit 127 of the instruction word"...which is bit 31 of this field.
+ */
+ struct {
+ uint32_t function_control:19;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } generic_gen5;
+
+ /** G45 PRM, Volume 4, Section 6.1.1.1 */
+ struct {
+ uint32_t function:4;
+ uint32_t int_type:1;
+ uint32_t precision:1;
+ uint32_t saturate:1;
+ uint32_t data_type:1;
+ uint32_t pad0:8;
+ uint32_t response_length:4;
+ uint32_t msg_length:4;
+ uint32_t msg_target:4;
+ uint32_t pad1:3;
+ uint32_t end_of_thread:1;
+ } math;
+
+ /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
+ struct {
+ uint32_t function:4;
+ uint32_t int_type:1;
+ uint32_t precision:1;
+ uint32_t saturate:1;
+ uint32_t data_type:1;
+ uint32_t snapshot:1;
+ uint32_t pad0:10;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } math_gen5;
+
+ /** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */
+ struct {
+ uint32_t binding_table_index:8;
+ uint32_t sampler:4;
+ uint32_t return_format:2;
+ uint32_t msg_type:2;
+ uint32_t response_length:4;
+ uint32_t msg_length:4;
+ uint32_t msg_target:4;
+ uint32_t pad1:3;
+ uint32_t end_of_thread:1;
+ } sampler;
+
+ /** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */
+ struct {
+ uint32_t binding_table_index:8;
+ uint32_t sampler:4;
+ uint32_t msg_type:4;
+ uint32_t response_length:4;
+ uint32_t msg_length:4;
+ uint32_t msg_target:4;
+ uint32_t pad1:3;
+ uint32_t end_of_thread:1;
+ } sampler_g4x;
+
+ /** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */
+ struct {
+ uint32_t binding_table_index:8;
+ uint32_t sampler:4;
+ uint32_t msg_type:4;
+ uint32_t simd_mode:2;
+ uint32_t pad0:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } sampler_gen5;
+
+ struct {
+ uint32_t binding_table_index:8;
+ uint32_t sampler:4;
+ uint32_t msg_type:5;
+ uint32_t simd_mode:2;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } sampler_gen7;
+
+ struct brw_urb_immediate urb;
+
+ struct {
+ uint32_t opcode:4;
+ uint32_t offset:6;
+ uint32_t swizzle_control:2;
+ uint32_t pad:1;
+ uint32_t allocate:1;
+ uint32_t used:1;
+ uint32_t complete:1;
+ uint32_t pad0:3;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } urb_gen5;
+
+ struct {
+ uint32_t opcode:3;
+ uint32_t offset:11;
+ uint32_t swizzle_control:1;
+ uint32_t complete:1;
+ uint32_t per_slot_offset:1;
+ uint32_t pad0:2;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } urb_gen7;
+
+ /** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */
+ struct {
+ uint32_t binding_table_index:8;
+ uint32_t msg_control:4;
+ uint32_t msg_type:2;
+ uint32_t target_cache:2;
+ uint32_t response_length:4;
+ uint32_t msg_length:4;
+ uint32_t msg_target:4;
+ uint32_t pad1:3;
+ uint32_t end_of_thread:1;
+ } dp_read;
+
+ /** G45 PRM, Volume 4, Section 5.10.1.1.2 */
+ struct {
+ uint32_t binding_table_index:8;
+ uint32_t msg_control:3;
+ uint32_t msg_type:3;
+ uint32_t target_cache:2;
+ uint32_t response_length:4;
+ uint32_t msg_length:4;
+ uint32_t msg_target:4;
+ uint32_t pad1:3;
+ uint32_t end_of_thread:1;
+ } dp_read_g4x;
+
+ /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
+ struct {
+ uint32_t binding_table_index:8;
+ uint32_t msg_control:3;
+ uint32_t msg_type:3;
+ uint32_t target_cache:2;
+ uint32_t pad0:3;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } dp_read_gen5;
+
+ /** G45 PRM, Volume 4, Section 5.10.1.1.2. For both Gen4 and G45. */
+ struct {
+ uint32_t binding_table_index:8;
+ uint32_t msg_control:3;
+ uint32_t last_render_target:1;
+ uint32_t msg_type:3;
+ uint32_t send_commit_msg:1;
+ uint32_t response_length:4;
+ uint32_t msg_length:4;
+ uint32_t msg_target:4;
+ uint32_t pad1:3;
+ uint32_t end_of_thread:1;
+ } dp_write;
+
+ /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
+ struct {
+ uint32_t binding_table_index:8;
+ uint32_t msg_control:3;
+ uint32_t last_render_target:1;
+ uint32_t msg_type:3;
+ uint32_t send_commit_msg:1;
+ uint32_t pad0:3;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } dp_write_gen5;
+
+ /**
+ * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
+ *
+ * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
+ **/
+ struct {
+ uint32_t binding_table_index:8;
+ uint32_t msg_control:5;
+ uint32_t msg_type:3;
+ uint32_t pad0:3;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } gen6_dp_sampler_const_cache;
+
+ /**
+ * Message for the Sandybridge Render Cache Data Port.
+ *
+ * Most fields are defined in the Sandybridge PRM, Volume 4 Part 1,
+ * Section 3.9.2.1.1: Message Descriptor.
+ *
+ * "Slot Group Select" and "Last Render Target" are part of the
+ * 5-bit message control for Render Target Write messages. See
+ * Section 3.9.9.2.1 of the same volume.
+ */
+ struct {
+ uint32_t binding_table_index:8;
+ uint32_t msg_control:3;
+ uint32_t slot_group_select:1;
+ uint32_t last_render_target:1;
+ uint32_t msg_type:4;
+ uint32_t send_commit_msg:1;
+ uint32_t pad0:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad1:2;
+ uint32_t end_of_thread:1;
+ } gen6_dp;
+
+ /**
+ * Message for any of the Gen7 Data Port caches.
+ *
+ * Most fields are defined in BSpec volume 5c.2 Data Port / Messages /
+ * Data Port Messages / Message Descriptor. Once again, "Slot Group
+ * Select" and "Last Render Target" are part of the 6-bit message
+ * control for Render Target Writes.
+ */
+ struct {
+ uint32_t binding_table_index:8;
+ uint32_t msg_control:3;
+ uint32_t slot_group_select:1;
+ uint32_t last_render_target:1;
+ uint32_t msg_control_pad:1;
+ uint32_t msg_type:4;
+ uint32_t pad1:1;
+ uint32_t header_present:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } gen7_dp;
+ /** @} */
+
+ struct {
+ uint32_t src1_subreg_nr_high:1;
+ uint32_t src1_reg_nr:8;
+ uint32_t pad0:1;
+ uint32_t src2_rep_ctrl:1;
+ uint32_t src2_swizzle:8;
+ uint32_t src2_subreg_nr:3;
+ uint32_t src2_reg_nr:8;
+ uint32_t pad1:2;
+ } da3src;
+
+ int d;
+ uint32_t ud;
+ float f;
+ } bits3;
+};
+
+#endif
--- /dev/null
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+/**
+ * \file program.cpp
+ * \author Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+#include "gen/program.h"
+#include "gen/program.hpp"
+#include "ir/liveness.hpp"
+#include "ir/value.hpp"
+#include "ir/unit.hpp"
+#include "llvm/llvm_to_gen.hpp"
+
+namespace gbe {
+namespace gen {
+
+ Kernel::Kernel(void) :
+ args(NULL), insns(NULL), argNum(0), insnNum(0), liveness(NULL), dag(NULL)
+ {}
+ Kernel::~Kernel(void) {
+ GBE_SAFE_DELETE_ARRAY(insns);
+ GBE_SAFE_DELETE_ARRAY(args);
+ GBE_SAFE_DELETE(liveness);
+ GBE_SAFE_DELETE(dag);
+ }
+
+ Program::Program(void) {}
+ Program::~Program(void) {
+ for (auto it = kernels.begin(); it != kernels.end(); ++it)
+ GBE_DELETE(it->second);
+ }
+
+ bool Program::buildFromSource(const char *source, std::string &error) {
+ NOT_IMPLEMENTED;
+ return false;
+ }
+ bool Program::buildFromLLVMFile(const char *fileName, std::string &error) {
+ ir::Unit unit;
+ if (llvmToGen(unit, fileName) == false) {
+ error = std::string(fileName) + " not found";
+ return false;
+ }
+ this->buildFromUnit(unit, error);
+ return true;
+ }
+ bool Program::buildFromUnit(const ir::Unit &unit, std::string &error) {
+ return false;
+ }
+
+} /* namespace gen */
+} /* namespace gbe */
+
--- /dev/null
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+/**
+ * \file program.h
+ * \author Benjamin Segovia <benjamin.segovia@intel.com>
+ *
+ * C-like interface for the gen kernels and programs
+ */
+
+#ifndef __GBE_GEN_PROGRAM_H__
+#define __GBE_GEN_PROGRAM_H__
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*! Opaque structure that interfaces a Gen program */
+typedef struct GenProgram GenProgram;
+
+/*! Opaque structure that interfaces a Gen kernel (ie one OCL function) */
+typedef struct GenKernel GenKernel;
+
+/*! Argument type for each function call */
+enum GenArgType {
+ GEN_ARG_VALUE = 0, // int, float and so on
+ GEN_ARG_GLOBAL_PTR = 1, // __global, __constant
+ GEN_ARG_STRUCTURE = 2, // By value structure
+ GEN_ARG_IMAGE = 3, // image2d_t, image3d_t
+ GEN_ARG_INVALUE = 0xffffffff
+};
+
+/*! Create a new program from the given source code (zero terminated string) */
+GenProgram *GenProgramNewFromSource(const char *source);
+
+/*! Create a new program from the given blob */
+GenProgram *GenProgramNewFromBinary(const char *binary, size_t size);
+
+/*! Destroy and deallocate the given program */
+void GenProgramDelete(GenProgram *program);
+
+/*! Get the number of functions in the program */
+uint32_t GenProgramGetKernelNum(const GenProgram *program);
+
+/*! Get the kernel from its name */
+const GenKernel GenProgramGetKernel(const GenProgram *program, const char *name);
+
+/*! Get the Gen ISA source code */
+const char *GenKernelGetCode(const GenKernel *kernel);
+
+/*! Get the size of the source code */
+const size_t GenKernelGetCodeSize(const GenKernel *kernel);
+
+/*! Get the total number of arguments */
+uint32_t GenKernelGetArgNum(const GenKernel *kernel);
+
+/*! Get the size of the given argument */
+uint32_t GenKernelGetArgSize(const GenKernel *kernel, uint32_t argID);
+
+/*! Get the type of the given argument */
+GenArgType GenKernelGetArgType(const GenKernel *kernel, uint32_t argID);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __GBE_GEN_PROGRAM_H__ */
+
--- /dev/null
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+/**
+ * \file program.hpp
+ * \author Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+#ifndef __GBE_GEN_PROGRAM_HPP__
+#define __GBE_GEN_PROGRAM_HPP__
+
+#include "gen/brw_structs.h"
+#include "sys/hash_map.hpp"
+#include <string>
+
+namespace gbe {
+namespace ir {
+
+ class Unit; // Compilation unit. Contains the program to compile
+ class Liveness; // Describes liveness of each ir function register
+ class FunctionDAG; // Describes the instruction dependencies
+
+} /* namespace ir */
+} /* namespace gbe */
+
+namespace gbe {
+namespace gen {
+
+ struct KernelArgument
+ {
+ GenArgType type; //!< Pointer, structure, regular value?
+ size_t size; //!< Size of each argument
+ };
+
+ /*! Describe a compiled kernel */
+ struct Kernel : public NonCopyable
+ {
+ /*! Create an empty kernel with the given name */
+ Kernel(void);
+ /*! Destroy it */
+ ~Kernel(void);
+
+ std::string name; //!< Kernel name
+ KernelArgument *args; //!< Each argument
+ brw_instruction *insns; //!< Instruction stream
+ uint32_t argNum; //!< Number of function arguments
+ uint32_t insnNum; //!< Number of instructions
+ ir::Liveness *liveness; //!< Used only for the build
+ ir::FunctionDAG *dag; //!< Used only for the build
+ GBE_STRUCT(Kernel); //!< Use gbe allocators
+ };
+
+ /*! Describe a compiled program */
+ struct Program : public NonCopyable
+ {
+ /*! Create an empty program */
+ Program(void);
+ /*! Destroy the program */
+ ~Program(void);
+ /*! Build a program from a ir::Unit */
+ bool buildFromUnit(const ir::Unit &unit, std::string &error);
+ /*! Buils a program from a LLVM source code */
+ bool buildFromLLVMFile(const char *fileName, std::string &error);
+ /*! Buils a program from a OCL string */
+ bool buildFromSource(const char *source, std::string &error);
+ /*! Kernels sorted by their name */
+ hash_map<std::string, Kernel*> kernels;
+ };
+
+} /* namespace gen */
+} /* namespace gbe */
+
+#endif /* __GBE_GEN_PROGRAM_HPP__ */
+
namespace gbe
{
- void llvmToGen(ir::Unit &unit, const char *fileName)
+ bool llvmToGen(ir::Unit &unit, const char *fileName)
{
using namespace llvm;
// Get the global LLVM context
SMDiagnostic Err;
std::auto_ptr<Module> M;
M.reset(ParseIRFile(fileName, Err, c));
- GBE_ASSERT (M.get() != 0);
+ if (M.get() == 0) return false;
Module &mod = *M.get();
llvm::PassManager passes;
passes.add(createGVNPass()); // Remove redundancies
passes.add(createGenPass(unit));
passes.run(mod);
+ return true;
}
} /* namespace gbe */
} /* namespace ir */
/*! Convert the LLVM IR code to a GEN IR code */
- void llvmToGen(ir::Unit &unit, const char *fileName);
+ bool llvmToGen(ir::Unit &unit, const char *fileName);
} /* namespace gbe */