From f35181d840683e0f3e683a296b3c287cd5e3ba5c Mon Sep 17 00:00:00 2001 From: Benjamin Segovia Date: Tue, 20 Mar 2012 20:57:42 -0700 Subject: [PATCH] Started to implement Gen code generation --- backend/CMakeLists.txt | 11 +- backend/kernels/Makefile | 34 + backend/src/CMakeLists.txt | 5 +- backend/src/gen/brw_chipset.h | 191 ++++ backend/src/gen/brw_defines.h | 1499 +++++++++++++++++++++++++++++ backend/src/gen/brw_disasm.c | 6 +- backend/src/gen/brw_eu.c | 34 +- backend/src/gen/brw_eu.h | 907 ++++++++---------- backend/src/gen/brw_eu_emit.c | 1946 +++++++++----------------------------- backend/src/gen/brw_structs.h | 758 +++++++++++++++ backend/src/gen/program.cpp | 70 ++ backend/src/gen/program.h | 87 ++ backend/src/gen/program.hpp | 90 ++ backend/src/llvm/llvm_to_gen.cpp | 5 +- backend/src/llvm/llvm_to_gen.hpp | 2 +- 15 files changed, 3635 insertions(+), 2010 deletions(-) create mode 100644 backend/kernels/Makefile create mode 100644 backend/src/gen/brw_chipset.h create mode 100644 backend/src/gen/brw_defines.h create mode 100644 backend/src/gen/brw_structs.h create mode 100644 backend/src/gen/program.cpp create mode 100644 backend/src/gen/program.h create mode 100644 backend/src/gen/program.hpp diff --git a/backend/CMakeLists.txt b/backend/CMakeLists.txt index cea4e58..75d6c5f 100644 --- a/backend/CMakeLists.txt +++ b/backend/CMakeLists.txt @@ -53,7 +53,8 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux") # set (VISIBILITY_FLAG "-fvisibility=hidden") if (COMPILER STREQUAL "GCC") - set (CMAKE_CXX_FLAGS "-Wstrict-aliasing=2 -Wno-invalid-offsetof -fstrict-aliasing -msse2 -ffast-math -fPIC -Wall -fno-rtti -std=c++0x") + set (CMAKE_C_CXX_FLAGS "-Wstrict-aliasing=2 -fstrict-aliasing -msse2 -ffast-math -fPIC -Wall") + set (CMAKE_CXX_FLAGS "${CMAKE_C_CXX_FLAGS} -Wno-invalid-offsetof -fno-rtti -std=c++0x") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${VISIBILITY_FLAG} -Wl,-E") @@ -64,6 +65,14 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux") set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1") set (CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG -DGBE_DEBUG=0") set (CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG -DGBE_DEBUG=0") + set (CMAKE_C_FLAGS "${CMAKE_C_CXX_FLAGS}") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GBE_DEBUG_MEMORY_FLAG}") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GBE_COMPILE_UTESTS_FLAG}") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${VISIBILITY_FLAG} -Wl,-E") + set (CMAKE_C_FLAGS_DEBUG "-g -DGBE_DEBUG=1") + set (CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1") + set (CMAKE_C_FLAGS_MINSIZEREL "-Os -DNDEBUG -DGBE_DEBUG=0") + set (CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG -DGBE_DEBUG=0") elseif (COMPILER STREQUAL "CLANG") set (CMAKE_C_COMPILER "clang") set (CMAKE_C_FLAGS "-Wall -std=c99") diff --git a/backend/kernels/Makefile b/backend/kernels/Makefile new file mode 100644 index 0000000..c5a0ebd --- /dev/null +++ b/backend/kernels/Makefile @@ -0,0 +1,34 @@ +%.ll : %.cl Makefile stdlib.h + ./compile.sh $< + +all: add.ll\ + add2.ll\ + cmp.ll\ + cmp_cvt.ll\ + complex_struct.ll\ + cycle.ll\ + extract.ll\ + function.ll\ + function_param.ll\ + get_global_id.ll\ + insert.ll\ + load_store.ll\ + loop.ll\ + loop2.ll\ + loop3.ll\ + loop4.ll\ + loop5.ll\ + select.ll\ + short.ll\ + shuffle.ll\ + simple_float4.ll\ + simple_float4_2.ll\ + simple_float4_3.ll\ + store.ll\ + struct.ll\ + struct2.ll\ + test_select.ll\ + undefined.ll\ + vector_constant.ll\ + void.ll + diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index 0560191..69b542a 100644 --- a/backend/src/CMakeLists.txt +++ b/backend/src/CMakeLists.txt @@ -42,7 +42,10 @@ else (GBE_USE_BLOB) ir/function.hpp ir/value.cpp ir/value.hpp - gen/brw_disasm.c) + gen/program.cpp + gen/brw_disasm.c + gen/brw_eu_emit.c + gen/brw_eu.c) if (GBE_COMPILE_UTESTS) set (GBE_SRC diff --git a/backend/src/gen/brw_chipset.h b/backend/src/gen/brw_chipset.h new file mode 100644 index 0000000..c2a06bb --- /dev/null +++ b/backend/src/gen/brw_chipset.h @@ -0,0 +1,191 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * Author: Benjamin Segovia + */ + + /* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#define PCI_CHIP_I810 0x7121 +#define PCI_CHIP_I810_DC100 0x7123 +#define PCI_CHIP_I810_E 0x7125 +#define PCI_CHIP_I815 0x1132 + +#define PCI_CHIP_I830_M 0x3577 +#define PCI_CHIP_845_G 0x2562 +#define PCI_CHIP_I855_GM 0x3582 +#define PCI_CHIP_I865_G 0x2572 + +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_E7221_G 0x258A +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE + +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q33_G 0x29D2 + +#define PCI_CHIP_IGD_GM 0xA011 +#define PCI_CHIP_IGD_G 0xA001 + +#define IS_IGDGM(devid) (devid == PCI_CHIP_IGD_GM) +#define IS_IGDG(devid) (devid == PCI_CHIP_IGD_G) +#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid)) + +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G_1 0x2982 +#define PCI_CHIP_I946_GZ 0x2972 +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GME 0x2A12 + +#define PCI_CHIP_GM45_GM 0x2A42 + +#define PCI_CHIP_IGD_E_G 0x2E02 +#define PCI_CHIP_Q45_G 0x2E12 +#define PCI_CHIP_G45_G 0x2E22 +#define PCI_CHIP_G41_G 0x2E32 +#define PCI_CHIP_B43_G 0x2E42 +#define PCI_CHIP_B43_G1 0x2E92 + +#define PCI_CHIP_ILD_G 0x0042 +#define PCI_CHIP_ILM_G 0x0046 + +#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* Desktop */ +#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112 +#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122 +#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* Mobile */ +#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116 +#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126 +#define PCI_CHIP_SANDYBRIDGE_S 0x010A /* Server */ + +#define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* Desktop */ +#define PCI_CHIP_IVYBRIDGE_GT2 0x0162 +#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* Mobile */ +#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166 +#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a /* Server */ + +#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \ + devid == PCI_CHIP_I915_GM || \ + devid == PCI_CHIP_I945_GM || \ + devid == PCI_CHIP_I945_GME || \ + devid == PCI_CHIP_I965_GM || \ + devid == PCI_CHIP_I965_GME || \ + devid == PCI_CHIP_GM45_GM || \ + IS_IGD(devid) || \ + devid == PCI_CHIP_ILM_G) + +#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ + devid == PCI_CHIP_Q45_G || \ + devid == PCI_CHIP_G45_G || \ + devid == PCI_CHIP_G41_G || \ + devid == PCI_CHIP_B43_G || \ + devid == PCI_CHIP_B43_G1) +#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) +#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) + +#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G) +#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G) +#define IS_GEN5(devid) (IS_ILD(devid) || IS_ILM(devid)) + +#define IS_915(devid) (devid == PCI_CHIP_I915_G || \ + devid == PCI_CHIP_E7221_G || \ + devid == PCI_CHIP_I915_GM) + +#define IS_945(devid) (devid == PCI_CHIP_I945_G || \ + devid == PCI_CHIP_I945_GM || \ + devid == PCI_CHIP_I945_GME || \ + devid == PCI_CHIP_G33_G || \ + devid == PCI_CHIP_Q33_G || \ + devid == PCI_CHIP_Q35_G || IS_IGD(devid)) + +#define IS_GEN4(devid) (devid == PCI_CHIP_I965_G || \ + devid == PCI_CHIP_I965_Q || \ + devid == PCI_CHIP_I965_G_1 || \ + devid == PCI_CHIP_I965_GM || \ + devid == PCI_CHIP_I965_GME || \ + devid == PCI_CHIP_I946_GZ || \ + IS_G4X(devid)) + +/* Compat macro for intel_decode.c */ +#define IS_IRONLAKE(devid) IS_GEN5(devid) + +#define IS_SNB_GT1(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \ + devid == PCI_CHIP_SANDYBRIDGE_S) + +#define IS_SNB_GT2(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT2 || \ + devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS) + +#define IS_GEN6(devid) (IS_SNB_GT1(devid) || IS_SNB_GT2(devid)) + +#define IS_IVB_GT1(devid) (devid == PCI_CHIP_IVYBRIDGE_GT1 || \ + devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \ + devid == PCI_CHIP_IVYBRIDGE_S_GT1) + +#define IS_IVB_GT2(devid) (devid == PCI_CHIP_IVYBRIDGE_GT2 || \ + devid == PCI_CHIP_IVYBRIDGE_M_GT2) + +#define IS_IVYBRIDGE(devid) (IS_IVB_GT1(devid) || IS_IVB_GT2(devid)) + +#define IS_GEN7(devid) IS_IVYBRIDGE(devid) + +#define IS_965(devid) (IS_GEN4(devid) || \ + IS_G4X(devid) || \ + IS_GEN5(devid) || \ + IS_GEN6(devid) || \ + IS_GEN7(devid)) + +#define IS_9XX(devid) (IS_915(devid) || \ + IS_945(devid) || \ + IS_965(devid)) + +#define IS_GEN3(devid) (IS_915(devid) || \ + IS_945(devid)) + +#define IS_GEN2(devid) (devid == PCI_CHIP_I830_M || \ + devid == PCI_CHIP_845_G || \ + devid == PCI_CHIP_I855_GM || \ + devid == PCI_CHIP_I865_G) diff --git a/backend/src/gen/brw_defines.h b/backend/src/gen/brw_defines.h new file mode 100644 index 0000000..e991a84 --- /dev/null +++ b/backend/src/gen/brw_defines.h @@ -0,0 +1,1499 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * Author: Benjamin Segovia + */ + + /* + * Authors: + * Keith Whitwell + */ + +#define INTEL_MASK(high, low) (((1<<((high)-(low)+1))-1)<<(low)) +#define SET_FIELD(value, field) (((value) << field ## _SHIFT) & field ## _MASK) +#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) + +#ifndef BRW_DEFINES_H +#define BRW_DEFINES_H + +/* 3D state: + */ +#define PIPE_CONTROL_NOWRITE 0x00 +#define PIPE_CONTROL_WRITEIMMEDIATE 0x01 +#define PIPE_CONTROL_WRITEDEPTH 0x02 +#define PIPE_CONTROL_WRITETIMESTAMP 0x03 + +#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 +#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 + +#define CMD_3D_PRIM 0x7b00 /* 3DPRIMITIVE */ +/* DW0 */ +# define GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT 10 +# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15) +# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 15) +/* DW1 */ +# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8) +# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8) + +#define _3DPRIM_POINTLIST 0x01 +#define _3DPRIM_LINELIST 0x02 +#define _3DPRIM_LINESTRIP 0x03 +#define _3DPRIM_TRILIST 0x04 +#define _3DPRIM_TRISTRIP 0x05 +#define _3DPRIM_TRIFAN 0x06 +#define _3DPRIM_QUADLIST 0x07 +#define _3DPRIM_QUADSTRIP 0x08 +#define _3DPRIM_LINELIST_ADJ 0x09 +#define _3DPRIM_LINESTRIP_ADJ 0x0A +#define _3DPRIM_TRILIST_ADJ 0x0B +#define _3DPRIM_TRISTRIP_ADJ 0x0C +#define _3DPRIM_TRISTRIP_REVERSE 0x0D +#define _3DPRIM_POLYGON 0x0E +#define _3DPRIM_RECTLIST 0x0F +#define _3DPRIM_LINELOOP 0x10 +#define _3DPRIM_POINTLIST_BF 0x11 +#define _3DPRIM_LINESTRIP_CONT 0x12 +#define _3DPRIM_LINESTRIP_BF 0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + +#define BRW_ANISORATIO_2 0 +#define BRW_ANISORATIO_4 1 +#define BRW_ANISORATIO_6 2 +#define BRW_ANISORATIO_8 3 +#define BRW_ANISORATIO_10 4 +#define BRW_ANISORATIO_12 5 +#define BRW_ANISORATIO_14 6 +#define BRW_ANISORATIO_16 7 + +#define BRW_BLENDFACTOR_ONE 0x1 +#define BRW_BLENDFACTOR_SRC_COLOR 0x2 +#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 +#define BRW_BLENDFACTOR_DST_ALPHA 0x4 +#define BRW_BLENDFACTOR_DST_COLOR 0x5 +#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define BRW_BLENDFACTOR_CONST_COLOR 0x7 +#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 +#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 +#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A +#define BRW_BLENDFACTOR_ZERO 0x11 +#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 +#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define BRW_BLENDFUNCTION_ADD 0 +#define BRW_BLENDFUNCTION_SUBTRACT 1 +#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BRW_BLENDFUNCTION_MIN 3 +#define BRW_BLENDFUNCTION_MAX 4 + +#define BRW_ALPHATEST_FORMAT_UNORM8 0 +#define BRW_ALPHATEST_FORMAT_FLOAT32 1 + +#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define BRW_CHROMAKEY_REPLACE_BLACK 1 + +#define BRW_CLIP_API_OGL 0 +#define BRW_CLIP_API_DX 1 + +#define BRW_CLIPMODE_NORMAL 0 +#define BRW_CLIPMODE_CLIP_ALL 1 +#define BRW_CLIPMODE_CLIP_NON_REJECTED 2 +#define BRW_CLIPMODE_REJECT_ALL 3 +#define BRW_CLIPMODE_ACCEPT_ALL 4 +#define BRW_CLIPMODE_KERNEL_CLIP 5 + +#define BRW_CLIP_NDCSPACE 0 +#define BRW_CLIP_SCREENSPACE 1 + +#define BRW_COMPAREFUNCTION_ALWAYS 0 +#define BRW_COMPAREFUNCTION_NEVER 1 +#define BRW_COMPAREFUNCTION_LESS 2 +#define BRW_COMPAREFUNCTION_EQUAL 3 +#define BRW_COMPAREFUNCTION_LEQUAL 4 +#define BRW_COMPAREFUNCTION_GREATER 5 +#define BRW_COMPAREFUNCTION_NOTEQUAL 6 +#define BRW_COMPAREFUNCTION_GEQUAL 7 + +#define BRW_COVERAGE_PIXELS_HALF 0 +#define BRW_COVERAGE_PIXELS_1 1 +#define BRW_COVERAGE_PIXELS_2 2 +#define BRW_COVERAGE_PIXELS_4 3 + +#define BRW_CULLMODE_BOTH 0 +#define BRW_CULLMODE_NONE 1 +#define BRW_CULLMODE_FRONT 2 +#define BRW_CULLMODE_BACK 3 + +#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 +#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 + +#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define BRW_DEPTHFORMAT_D32_FLOAT 1 +#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define BRW_DEPTHFORMAT_D24_UNORM_X8_UINT 3 /* GEN5 */ +#define BRW_DEPTHFORMAT_D16_UNORM 5 + +#define BRW_FLOATING_POINT_IEEE_754 0 +#define BRW_FLOATING_POINT_NON_IEEE_754 1 + +#define BRW_FRONTWINDING_CW 0 +#define BRW_FRONTWINDING_CCW 1 + +#define BRW_SPRITE_POINT_ENABLE 16 + +#define BRW_INDEX_BYTE 0 +#define BRW_INDEX_WORD 1 +#define BRW_INDEX_DWORD 2 + +#define BRW_LOGICOPFUNCTION_CLEAR 0 +#define BRW_LOGICOPFUNCTION_NOR 1 +#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 +#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 +#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 +#define BRW_LOGICOPFUNCTION_INVERT 5 +#define BRW_LOGICOPFUNCTION_XOR 6 +#define BRW_LOGICOPFUNCTION_NAND 7 +#define BRW_LOGICOPFUNCTION_AND 8 +#define BRW_LOGICOPFUNCTION_EQUIV 9 +#define BRW_LOGICOPFUNCTION_NOOP 10 +#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 +#define BRW_LOGICOPFUNCTION_COPY 12 +#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 +#define BRW_LOGICOPFUNCTION_OR 14 +#define BRW_LOGICOPFUNCTION_SET 15 + +#define BRW_MAPFILTER_NEAREST 0x0 +#define BRW_MAPFILTER_LINEAR 0x1 +#define BRW_MAPFILTER_ANISOTROPIC 0x2 + +#define BRW_MIPFILTER_NONE 0 +#define BRW_MIPFILTER_NEAREST 1 +#define BRW_MIPFILTER_LINEAR 3 + +#define BRW_ADDRESS_ROUNDING_ENABLE_U_MAG 0x20 +#define BRW_ADDRESS_ROUNDING_ENABLE_U_MIN 0x10 +#define BRW_ADDRESS_ROUNDING_ENABLE_V_MAG 0x08 +#define BRW_ADDRESS_ROUNDING_ENABLE_V_MIN 0x04 +#define BRW_ADDRESS_ROUNDING_ENABLE_R_MAG 0x02 +#define BRW_ADDRESS_ROUNDING_ENABLE_R_MIN 0x01 + +#define BRW_POLYGON_FRONT_FACING 0 +#define BRW_POLYGON_BACK_FACING 1 + +#define BRW_PREFILTER_ALWAYS 0x0 +#define BRW_PREFILTER_NEVER 0x1 +#define BRW_PREFILTER_LESS 0x2 +#define BRW_PREFILTER_EQUAL 0x3 +#define BRW_PREFILTER_LEQUAL 0x4 +#define BRW_PREFILTER_GREATER 0x5 +#define BRW_PREFILTER_NOTEQUAL 0x6 +#define BRW_PREFILTER_GEQUAL 0x7 + +#define BRW_PROVOKING_VERTEX_0 0 +#define BRW_PROVOKING_VERTEX_1 1 +#define BRW_PROVOKING_VERTEX_2 2 + +#define BRW_RASTRULE_UPPER_LEFT 0 +#define BRW_RASTRULE_UPPER_RIGHT 1 +/* These are listed as "Reserved, but not seen as useful" + * in Intel documentation (page 212, "Point Rasterization Rule", + * section 7.4 "SF Pipeline State Summary", of document + * "Intel® 965 Express Chipset Family and Intel® G35 Express + * Chipset Graphics Controller Programmer's Reference Manual, + * Volume 2: 3D/Media", Revision 1.0b as of January 2008, + * available at + * http://intellinuxgraphics.org/documentation.html + * at the time of this writing). + * + * These appear to be supported on at least some + * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT + * is useful when using OpenGL to render to a FBO + * (which has the pixel coordinate Y orientation inverted + * with respect to the normal OpenGL pixel coordinate system). + */ +#define BRW_RASTRULE_LOWER_LEFT 2 +#define BRW_RASTRULE_LOWER_RIGHT 3 + +#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 +#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 +#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 + +#define BRW_STENCILOP_KEEP 0 +#define BRW_STENCILOP_ZERO 1 +#define BRW_STENCILOP_REPLACE 2 +#define BRW_STENCILOP_INCRSAT 3 +#define BRW_STENCILOP_DECRSAT 4 +#define BRW_STENCILOP_INCR 5 +#define BRW_STENCILOP_DECR 6 +#define BRW_STENCILOP_INVERT 7 + +/* Surface state DW0 */ +#define BRW_SURFACE_RC_READ_WRITE (1 << 8) +#define BRW_SURFACE_MIPLAYOUT_SHIFT 10 +#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 +#define BRW_SURFACE_CUBEFACE_ENABLES 0x3f +#define BRW_SURFACE_BLEND_ENABLED (1 << 13) +#define BRW_SURFACE_WRITEDISABLE_B_SHIFT 14 +#define BRW_SURFACE_WRITEDISABLE_G_SHIFT 15 +#define BRW_SURFACE_WRITEDISABLE_R_SHIFT 16 +#define BRW_SURFACE_WRITEDISABLE_A_SHIFT 17 + +#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005 +#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041 +#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042 +#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085 +#define BRW_SURFACEFORMAT_R32G32_SINT 0x086 +#define BRW_SURFACEFORMAT_R32G32_UINT 0x087 +#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A +#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B +#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C +#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D +#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090 +#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091 +#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092 +#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095 +#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC +#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD +#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE +#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF +#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define BRW_SURFACEFORMAT_R32_SINT 0x0D6 +#define BRW_SURFACEFORMAT_R32_UINT 0x0D7 +#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8 +#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF +#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3 +#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4 +#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1 +#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2 +#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8 +#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106 +#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107 +#define BRW_SURFACEFORMAT_R8G8_SINT 0x108 +#define BRW_SURFACEFORMAT_R8G8_UINT 0x109 +#define BRW_SURFACEFORMAT_R16_UNORM 0x10A +#define BRW_SURFACEFORMAT_R16_SNORM 0x10B +#define BRW_SURFACEFORMAT_R16_SINT 0x10C +#define BRW_SURFACEFORMAT_R16_UINT 0x10D +#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E +#define BRW_SURFACEFORMAT_I16_UNORM 0x111 +#define BRW_SURFACEFORMAT_L16_UNORM 0x112 +#define BRW_SURFACEFORMAT_A16_UNORM 0x113 +#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 +#define BRW_SURFACEFORMAT_I16_FLOAT 0x115 +#define BRW_SURFACEFORMAT_L16_FLOAT 0x116 +#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 +#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C +#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D +#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E +#define BRW_SURFACEFORMAT_R16_USCALED 0x11F +#define BRW_SURFACEFORMAT_R8_UNORM 0x140 +#define BRW_SURFACEFORMAT_R8_SNORM 0x141 +#define BRW_SURFACEFORMAT_R8_SINT 0x142 +#define BRW_SURFACEFORMAT_R8_UINT 0x143 +#define BRW_SURFACEFORMAT_A8_UNORM 0x144 +#define BRW_SURFACEFORMAT_I8_UNORM 0x145 +#define BRW_SURFACEFORMAT_L8_UNORM 0x146 +#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147 +#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 +#define BRW_SURFACEFORMAT_R8_SSCALED 0x149 +#define BRW_SURFACEFORMAT_R8_USCALED 0x14A +#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C +#define BRW_SURFACEFORMAT_DXT1_RGB_SRGB 0x180 +#define BRW_SURFACEFORMAT_R1_UINT 0x181 +#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define BRW_SURFACEFORMAT_BC1_UNORM 0x186 +#define BRW_SURFACEFORMAT_BC2_UNORM 0x187 +#define BRW_SURFACEFORMAT_BC3_UNORM 0x188 +#define BRW_SURFACEFORMAT_BC4_UNORM 0x189 +#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A +#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define BRW_SURFACEFORMAT_MONO8 0x18E +#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define BRW_SURFACEFORMAT_DXT1_RGB 0x191 +#define BRW_SURFACEFORMAT_FXT1 0x192 +#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define BRW_SURFACEFORMAT_BC4_SNORM 0x199 +#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A +#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F +#define BRW_SURFACE_FORMAT_SHIFT 18 +#define BRW_SURFACE_FORMAT_MASK INTEL_MASK(26, 18) + +#define BRW_SURFACERETURNFORMAT_FLOAT32 0 +#define BRW_SURFACERETURNFORMAT_S1 1 + +#define BRW_SURFACE_TYPE_SHIFT 29 +#define BRW_SURFACE_TYPE_MASK INTEL_MASK(31, 29) +#define BRW_SURFACE_1D 0 +#define BRW_SURFACE_2D 1 +#define BRW_SURFACE_3D 2 +#define BRW_SURFACE_CUBE 3 +#define BRW_SURFACE_BUFFER 4 +#define BRW_SURFACE_NULL 7 + +/* Surface state DW2 */ +#define BRW_SURFACE_HEIGHT_SHIFT 19 +#define BRW_SURFACE_HEIGHT_MASK INTEL_MASK(31, 19) +#define BRW_SURFACE_WIDTH_SHIFT 6 +#define BRW_SURFACE_WIDTH_MASK INTEL_MASK(18, 6) +#define BRW_SURFACE_LOD_SHIFT 2 +#define BRW_SURFACE_LOD_MASK INTEL_MASK(5, 2) + +/* Surface state DW3 */ +#define BRW_SURFACE_DEPTH_SHIFT 21 +#define BRW_SURFACE_DEPTH_MASK INTEL_MASK(31, 21) +#define BRW_SURFACE_PITCH_SHIFT 3 +#define BRW_SURFACE_PITCH_MASK INTEL_MASK(19, 3) +#define BRW_SURFACE_TILED (1 << 1) +#define BRW_SURFACE_TILED_Y (1 << 0) + +/* Surface state DW4 */ +#define BRW_SURFACE_MIN_LOD_SHIFT 28 +#define BRW_SURFACE_MIN_LOD_MASK INTEL_MASK(31, 28) + +/* Surface state DW5 */ +#define BRW_SURFACE_X_OFFSET_SHIFT 25 +#define BRW_SURFACE_X_OFFSET_MASK INTEL_MASK(31, 25) +#define BRW_SURFACE_VERTICAL_ALIGN_ENABLE (1 << 24) +#define BRW_SURFACE_Y_OFFSET_SHIFT 20 +#define BRW_SURFACE_Y_OFFSET_MASK INTEL_MASK(23, 20) + +#define BRW_TEXCOORDMODE_WRAP 0 +#define BRW_TEXCOORDMODE_MIRROR 1 +#define BRW_TEXCOORDMODE_CLAMP 2 +#define BRW_TEXCOORDMODE_CUBE 3 +#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 +#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 + +#define BRW_THREAD_PRIORITY_NORMAL 0 +#define BRW_THREAD_PRIORITY_HIGH 1 + +#define BRW_TILEWALK_XMAJOR 0 +#define BRW_TILEWALK_YMAJOR 1 + +#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +/* Execution Unit (EU) defines + */ + +#define BRW_ALIGN_1 0 +#define BRW_ALIGN_16 1 + +#define BRW_ADDRESS_DIRECT 0 +#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define BRW_CHANNEL_X 0 +#define BRW_CHANNEL_Y 1 +#define BRW_CHANNEL_Z 2 +#define BRW_CHANNEL_W 3 + +enum brw_compression { + BRW_COMPRESSION_NONE = 0, + BRW_COMPRESSION_2NDHALF = 1, + BRW_COMPRESSION_COMPRESSED = 2, +}; + +#define GEN6_COMPRESSION_1Q 0 +#define GEN6_COMPRESSION_2Q 1 +#define GEN6_COMPRESSION_3Q 2 +#define GEN6_COMPRESSION_4Q 3 +#define GEN6_COMPRESSION_1H 0 +#define GEN6_COMPRESSION_2H 2 + +#define BRW_CONDITIONAL_NONE 0 +#define BRW_CONDITIONAL_Z 1 +#define BRW_CONDITIONAL_NZ 2 +#define BRW_CONDITIONAL_EQ 1 /* Z */ +#define BRW_CONDITIONAL_NEQ 2 /* NZ */ +#define BRW_CONDITIONAL_G 3 +#define BRW_CONDITIONAL_GE 4 +#define BRW_CONDITIONAL_L 5 +#define BRW_CONDITIONAL_LE 6 +#define BRW_CONDITIONAL_R 7 +#define BRW_CONDITIONAL_O 8 +#define BRW_CONDITIONAL_U 9 + +#define BRW_DEBUG_NONE 0 +#define BRW_DEBUG_BREAKPOINT 1 + +#define BRW_DEPENDENCY_NORMAL 0 +#define BRW_DEPENDENCY_NOTCLEARED 1 +#define BRW_DEPENDENCY_NOTCHECKED 2 +#define BRW_DEPENDENCY_DISABLE 3 + +#define BRW_EXECUTE_1 0 +#define BRW_EXECUTE_2 1 +#define BRW_EXECUTE_4 2 +#define BRW_EXECUTE_8 3 +#define BRW_EXECUTE_16 4 +#define BRW_EXECUTE_32 5 + +#define BRW_HORIZONTAL_STRIDE_0 0 +#define BRW_HORIZONTAL_STRIDE_1 1 +#define BRW_HORIZONTAL_STRIDE_2 2 +#define BRW_HORIZONTAL_STRIDE_4 3 + +#define BRW_INSTRUCTION_NORMAL 0 +#define BRW_INSTRUCTION_SATURATE 1 + +#define BRW_MASK_ENABLE 0 +#define BRW_MASK_DISABLE 1 + +/** @{ + * + * Gen6 has replaced "mask enable/disable" with WECtrl, which is + * effectively the same but much simpler to think about. Now, there + * are two contributors ANDed together to whether channels are + * executed: The predication on the instruction, and the channel write + * enable. + */ +/** + * This is the default value. It means that a channel's write enable is set + * if the per-channel IP is pointing at this instruction. + */ +#define BRW_WE_NORMAL 0 +/** + * This is used like BRW_MASK_DISABLE, and causes all channels to have + * their write enable set. Note that predication still contributes to + * whether the channel actually gets written. + */ +#define BRW_WE_ALL 1 +/** @} */ + +enum opcode { + /* These are the actual hardware opcodes. */ + BRW_OPCODE_MOV = 1, + BRW_OPCODE_SEL = 2, + BRW_OPCODE_NOT = 4, + BRW_OPCODE_AND = 5, + BRW_OPCODE_OR = 6, + BRW_OPCODE_XOR = 7, + BRW_OPCODE_SHR = 8, + BRW_OPCODE_SHL = 9, + BRW_OPCODE_RSR = 10, + BRW_OPCODE_RSL = 11, + BRW_OPCODE_ASR = 12, + BRW_OPCODE_CMP = 16, + BRW_OPCODE_CMPN = 17, + BRW_OPCODE_JMPI = 32, + BRW_OPCODE_IF = 34, + BRW_OPCODE_IFF = 35, + BRW_OPCODE_ELSE = 36, + BRW_OPCODE_ENDIF = 37, + BRW_OPCODE_DO = 38, + BRW_OPCODE_WHILE = 39, + BRW_OPCODE_BREAK = 40, + BRW_OPCODE_CONTINUE = 41, + BRW_OPCODE_HALT = 42, + BRW_OPCODE_MSAVE = 44, + BRW_OPCODE_MRESTORE = 45, + BRW_OPCODE_PUSH = 46, + BRW_OPCODE_POP = 47, + BRW_OPCODE_WAIT = 48, + BRW_OPCODE_SEND = 49, + BRW_OPCODE_SENDC = 50, + BRW_OPCODE_MATH = 56, + BRW_OPCODE_ADD = 64, + BRW_OPCODE_MUL = 65, + BRW_OPCODE_AVG = 66, + BRW_OPCODE_FRC = 67, + BRW_OPCODE_RNDU = 68, + BRW_OPCODE_RNDD = 69, + BRW_OPCODE_RNDE = 70, + BRW_OPCODE_RNDZ = 71, + BRW_OPCODE_MAC = 72, + BRW_OPCODE_MACH = 73, + BRW_OPCODE_LZD = 74, + BRW_OPCODE_SAD2 = 80, + BRW_OPCODE_SADA2 = 81, + BRW_OPCODE_DP4 = 84, + BRW_OPCODE_DPH = 85, + BRW_OPCODE_DP3 = 86, + BRW_OPCODE_DP2 = 87, + BRW_OPCODE_DPA2 = 88, + BRW_OPCODE_LINE = 89, + BRW_OPCODE_PLN = 90, + BRW_OPCODE_MAD = 91, + BRW_OPCODE_NOP = 126, + + /* These are compiler backend opcodes that get translated into other + * instructions. + */ + FS_OPCODE_FB_WRITE = 128, + SHADER_OPCODE_RCP, + SHADER_OPCODE_RSQ, + SHADER_OPCODE_SQRT, + SHADER_OPCODE_EXP2, + SHADER_OPCODE_LOG2, + SHADER_OPCODE_POW, + SHADER_OPCODE_INT_QUOTIENT, + SHADER_OPCODE_INT_REMAINDER, + SHADER_OPCODE_SIN, + SHADER_OPCODE_COS, + + SHADER_OPCODE_TEX, + SHADER_OPCODE_TXD, + SHADER_OPCODE_TXF, + SHADER_OPCODE_TXL, + SHADER_OPCODE_TXS, + FS_OPCODE_TXB, + + FS_OPCODE_DDX, + FS_OPCODE_DDY, + FS_OPCODE_PIXEL_X, + FS_OPCODE_PIXEL_Y, + FS_OPCODE_CINTERP, + FS_OPCODE_LINTERP, + FS_OPCODE_DISCARD, + FS_OPCODE_SPILL, + FS_OPCODE_UNSPILL, + FS_OPCODE_PULL_CONSTANT_LOAD, + + VS_OPCODE_URB_WRITE, + VS_OPCODE_SCRATCH_READ, + VS_OPCODE_SCRATCH_WRITE, + VS_OPCODE_PULL_CONSTANT_LOAD, +}; + +#define BRW_PREDICATE_NONE 0 +#define BRW_PREDICATE_NORMAL 1 +#define BRW_PREDICATE_ALIGN1_ANYV 2 +#define BRW_PREDICATE_ALIGN1_ALLV 3 +#define BRW_PREDICATE_ALIGN1_ANY2H 4 +#define BRW_PREDICATE_ALIGN1_ALL2H 5 +#define BRW_PREDICATE_ALIGN1_ANY4H 6 +#define BRW_PREDICATE_ALIGN1_ALL4H 7 +#define BRW_PREDICATE_ALIGN1_ANY8H 8 +#define BRW_PREDICATE_ALIGN1_ALL8H 9 +#define BRW_PREDICATE_ALIGN1_ANY16H 10 +#define BRW_PREDICATE_ALIGN1_ALL16H 11 +#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 +#define BRW_PREDICATE_ALIGN16_ANY4H 6 +#define BRW_PREDICATE_ALIGN16_ALL4H 7 + +#define BRW_ARCHITECTURE_REGISTER_FILE 0 +#define BRW_GENERAL_REGISTER_FILE 1 +#define BRW_MESSAGE_REGISTER_FILE 2 +#define BRW_IMMEDIATE_VALUE 3 + +#define BRW_REGISTER_TYPE_UD 0 +#define BRW_REGISTER_TYPE_D 1 +#define BRW_REGISTER_TYPE_UW 2 +#define BRW_REGISTER_TYPE_W 3 +#define BRW_REGISTER_TYPE_UB 4 +#define BRW_REGISTER_TYPE_B 5 +#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define BRW_REGISTER_TYPE_HF 6 +#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define BRW_REGISTER_TYPE_F 7 + +#define BRW_ARF_NULL 0x00 +#define BRW_ARF_ADDRESS 0x10 +#define BRW_ARF_ACCUMULATOR 0x20 +#define BRW_ARF_FLAG 0x30 +#define BRW_ARF_MASK 0x40 +#define BRW_ARF_MASK_STACK 0x50 +#define BRW_ARF_MASK_STACK_DEPTH 0x60 +#define BRW_ARF_STATE 0x70 +#define BRW_ARF_CONTROL 0x80 +#define BRW_ARF_NOTIFICATION_COUNT 0x90 +#define BRW_ARF_IP 0xA0 + +#define BRW_MRF_COMPR4 (1 << 7) + +#define BRW_AMASK 0 +#define BRW_IMASK 1 +#define BRW_LMASK 2 +#define BRW_CMASK 3 + + + +#define BRW_THREAD_NORMAL 0 +#define BRW_THREAD_ATOMIC 1 +#define BRW_THREAD_SWITCH 2 + +#define BRW_VERTICAL_STRIDE_0 0 +#define BRW_VERTICAL_STRIDE_1 1 +#define BRW_VERTICAL_STRIDE_2 2 +#define BRW_VERTICAL_STRIDE_4 3 +#define BRW_VERTICAL_STRIDE_8 4 +#define BRW_VERTICAL_STRIDE_16 5 +#define BRW_VERTICAL_STRIDE_32 6 +#define BRW_VERTICAL_STRIDE_64 7 +#define BRW_VERTICAL_STRIDE_128 8 +#define BRW_VERTICAL_STRIDE_256 9 +#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define BRW_WIDTH_1 0 +#define BRW_WIDTH_2 1 +#define BRW_WIDTH_4 2 +#define BRW_WIDTH_8 3 +#define BRW_WIDTH_16 4 + +#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 +#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 +#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 +#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 +#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 +#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 +#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 +#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 +#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 +#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 +#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 +#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define BRW_POLYGON_FACING_FRONT 0 +#define BRW_POLYGON_FACING_BACK 1 + +/** + * Message target: Shared Function ID for where to SEND a message. + * + * These are enumerated in the ISA reference under "send - Send Message". + * In particular, see the following tables: + * - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition" + * - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor" + * - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) / + * Overview / GPE Function IDs + */ +enum brw_message_target { + BRW_SFID_NULL = 0, + BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */ + BRW_SFID_SAMPLER = 2, + BRW_SFID_MESSAGE_GATEWAY = 3, + BRW_SFID_DATAPORT_READ = 4, + BRW_SFID_DATAPORT_WRITE = 5, + BRW_SFID_URB = 6, + BRW_SFID_THREAD_SPAWNER = 7, + + GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4, + GEN6_SFID_DATAPORT_RENDER_CACHE = 5, + GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9, + + GEN7_SFID_DATAPORT_DATA_CACHE = 10, +}; + +#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10 + +#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 +#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE 1 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define GEN5_SAMPLER_MESSAGE_SAMPLE 0 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10 + +/* for GEN5 only */ +#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 +#define BRW_SAMPLER_SIMD_MODE_SIMD8 1 +#define BRW_SAMPLER_SIMD_MODE_SIMD16 2 +#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 + +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +/* This one stays the same across generations. */ +#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +/* GEN4 */ +#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 +/* G45, GEN5 */ +#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 +#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 +#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3 +#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 +#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 +/* GEN6 */ +#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 +#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5 +#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 + +#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +/* GEN6 */ +#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7 +#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8 +#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9 +#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10 +#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11 +#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12 +#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13 +#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14 + +/* GEN7 */ +#define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 10 + +#define BRW_MATH_FUNCTION_INV 1 +#define BRW_MATH_FUNCTION_LOG 2 +#define BRW_MATH_FUNCTION_EXP 3 +#define BRW_MATH_FUNCTION_SQRT 4 +#define BRW_MATH_FUNCTION_RSQ 5 +#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ +#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ +#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */ +#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */ +#define BRW_MATH_FUNCTION_POW 10 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define BRW_MATH_INTEGER_UNSIGNED 0 +#define BRW_MATH_INTEGER_SIGNED 1 + +#define BRW_MATH_PRECISION_FULL 0 +#define BRW_MATH_PRECISION_PARTIAL 1 + +#define BRW_MATH_SATURATE_NONE 0 +#define BRW_MATH_SATURATE_SATURATE 1 + +#define BRW_MATH_DATA_VECTOR 0 +#define BRW_MATH_DATA_SCALAR 1 + +#define BRW_URB_OPCODE_WRITE 0 + +#define BRW_URB_SWIZZLE_NONE 0 +#define BRW_URB_SWIZZLE_INTERLEAVE 1 +#define BRW_URB_SWIZZLE_TRANSPOSE 2 + +#define BRW_SCRATCH_SPACE_SIZE_1K 0 +#define BRW_SCRATCH_SPACE_SIZE_2K 1 +#define BRW_SCRATCH_SPACE_SIZE_4K 2 +#define BRW_SCRATCH_SPACE_SIZE_8K 3 +#define BRW_SCRATCH_SPACE_SIZE_16K 4 +#define BRW_SCRATCH_SPACE_SIZE_32K 5 +#define BRW_SCRATCH_SPACE_SIZE_64K 6 +#define BRW_SCRATCH_SPACE_SIZE_128K 7 +#define BRW_SCRATCH_SPACE_SIZE_256K 8 +#define BRW_SCRATCH_SPACE_SIZE_512K 9 +#define BRW_SCRATCH_SPACE_SIZE_1M 10 +#define BRW_SCRATCH_SPACE_SIZE_2M 11 + + + + +#define CMD_URB_FENCE 0x6000 +#define CMD_CS_URB_STATE 0x6001 +#define CMD_CONST_BUFFER 0x6002 + +#define CMD_STATE_BASE_ADDRESS 0x6101 +#define CMD_STATE_SIP 0x6102 +#define CMD_PIPELINE_SELECT_965 0x6104 +#define CMD_PIPELINE_SELECT_GM45 0x6904 + +#define _3DSTATE_PIPELINED_POINTERS 0x7800 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x7801 +# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) +# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) +# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12) + +#define _3DSTATE_BINDING_TABLE_POINTERS_VS 0x7826 /* GEN7+ */ +#define _3DSTATE_BINDING_TABLE_POINTERS_HS 0x7827 /* GEN7+ */ +#define _3DSTATE_BINDING_TABLE_POINTERS_DS 0x7828 /* GEN7+ */ +#define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x7829 /* GEN7+ */ +#define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782A /* GEN7+ */ + +#define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GEN6+ */ +# define PS_SAMPLER_STATE_CHANGE (1 << 12) +# define GS_SAMPLER_STATE_CHANGE (1 << 9) +# define VS_SAMPLER_STATE_CHANGE (1 << 8) +/* DW1: VS */ +/* DW2: GS */ +/* DW3: PS */ + +#define _3DSTATE_SAMPLER_STATE_POINTERS_VS 0x782B /* GEN7+ */ +#define _3DSTATE_SAMPLER_STATE_POINTERS_GS 0x782E /* GEN7+ */ +#define _3DSTATE_SAMPLER_STATE_POINTERS_PS 0x782F /* GEN7+ */ + +#define _3DSTATE_VERTEX_BUFFERS 0x7808 +# define BRW_VB0_INDEX_SHIFT 27 +# define GEN6_VB0_INDEX_SHIFT 26 +# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) +# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26) +# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20) +# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20) +# define GEN7_VB0_ADDRESS_MODIFYENABLE (1 << 14) +# define BRW_VB0_PITCH_SHIFT 0 + +#define _3DSTATE_VERTEX_ELEMENTS 0x7809 +# define BRW_VE0_INDEX_SHIFT 27 +# define GEN6_VE0_INDEX_SHIFT 26 +# define BRW_VE0_FORMAT_SHIFT 16 +# define BRW_VE0_VALID (1 << 26) +# define GEN6_VE0_VALID (1 << 25) +# define BRW_VE0_SRC_OFFSET_SHIFT 0 +# define BRW_VE1_COMPONENT_NOSTORE 0 +# define BRW_VE1_COMPONENT_STORE_SRC 1 +# define BRW_VE1_COMPONENT_STORE_0 2 +# define BRW_VE1_COMPONENT_STORE_1_FLT 3 +# define BRW_VE1_COMPONENT_STORE_1_INT 4 +# define BRW_VE1_COMPONENT_STORE_VID 5 +# define BRW_VE1_COMPONENT_STORE_IID 6 +# define BRW_VE1_COMPONENT_STORE_PID 7 +# define BRW_VE1_COMPONENT_0_SHIFT 28 +# define BRW_VE1_COMPONENT_1_SHIFT 24 +# define BRW_VE1_COMPONENT_2_SHIFT 20 +# define BRW_VE1_COMPONENT_3_SHIFT 16 +# define BRW_VE1_DST_OFFSET_SHIFT 0 + +#define CMD_INDEX_BUFFER 0x780a +#define GEN4_3DSTATE_VF_STATISTICS 0x780b +#define GM45_3DSTATE_VF_STATISTICS 0x680b +#define _3DSTATE_CC_STATE_POINTERS 0x780e /* GEN6+ */ +#define _3DSTATE_BLEND_STATE_POINTERS 0x7824 /* GEN7+ */ +#define _3DSTATE_DEPTH_STENCIL_STATE_POINTERS 0x7825 /* GEN7+ */ + +#define _3DSTATE_URB 0x7805 /* GEN6 */ +# define GEN6_URB_VS_SIZE_SHIFT 16 +# define GEN6_URB_VS_ENTRIES_SHIFT 0 +# define GEN6_URB_GS_ENTRIES_SHIFT 8 +# define GEN6_URB_GS_SIZE_SHIFT 0 + +#define _3DSTATE_URB_VS 0x7830 /* GEN7+ */ +#define _3DSTATE_URB_HS 0x7831 /* GEN7+ */ +#define _3DSTATE_URB_DS 0x7832 /* GEN7+ */ +#define _3DSTATE_URB_GS 0x7833 /* GEN7+ */ +# define GEN7_URB_ENTRY_SIZE_SHIFT 16 +# define GEN7_URB_STARTING_ADDRESS_SHIFT 25 + +#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GEN7+ */ +#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS 0x7916 /* GEN7+ */ +# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 + +#define _3DSTATE_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ +# define GEN6_CC_VIEWPORT_MODIFY (1 << 12) +# define GEN6_SF_VIEWPORT_MODIFY (1 << 11) +# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10) + +#define _3DSTATE_VIEWPORT_STATE_POINTERS_CC 0x7823 /* GEN7+ */ +#define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL 0x7821 /* GEN7+ */ + +#define _3DSTATE_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */ + +#define _3DSTATE_VS 0x7810 /* GEN6+ */ +/* DW2 */ +# define GEN6_VS_SPF_MODE (1 << 31) +# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_VS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16) +/* DW4 */ +# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20 +# define GEN6_VS_URB_READ_LENGTH_SHIFT 11 +# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW5 */ +# define GEN6_VS_MAX_THREADS_SHIFT 25 +# define GEN6_VS_STATISTICS_ENABLE (1 << 10) +# define GEN6_VS_CACHE_DISABLE (1 << 1) +# define GEN6_VS_ENABLE (1 << 0) + +#define _3DSTATE_GS 0x7811 /* GEN6+ */ +/* DW2 */ +# define GEN6_GS_SPF_MODE (1 << 31) +# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_GS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_GS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_GS_FLOATING_POINT_MODE_ALT (1 << 16) +/* DW4 */ +# define GEN6_GS_URB_READ_LENGTH_SHIFT 11 +# define GEN7_GS_INCLUDE_VERTEX_HANDLES (1 << 10) +# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4 +# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0 +/* DW5 */ +# define GEN6_GS_MAX_THREADS_SHIFT 25 +# define GEN6_GS_STATISTICS_ENABLE (1 << 10) +# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9) +# define GEN6_GS_RENDERING_ENABLE (1 << 8) +# define GEN7_GS_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_GS_REORDER (1 << 30) +# define GEN6_GS_DISCARD_ADJACENCY (1 << 29) +# define GEN6_GS_SVBI_PAYLOAD_ENABLE (1 << 28) +# define GEN6_GS_SVBI_POSTINCREMENT_ENABLE (1 << 27) +# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT 16 +# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16) +# define GEN6_GS_ENABLE (1 << 15) + +# define BRW_GS_EDGE_INDICATOR_0 (1 << 8) +# define BRW_GS_EDGE_INDICATOR_1 (1 << 9) + +#define _3DSTATE_HS 0x781B /* GEN7+ */ +#define _3DSTATE_TE 0x781C /* GEN7+ */ +#define _3DSTATE_DS 0x781D /* GEN7+ */ + +#define _3DSTATE_CLIP 0x7812 /* GEN6+ */ +/* DW1 */ +# define GEN7_CLIP_WINDING_CW (0 << 20) +# define GEN7_CLIP_WINDING_CCW (1 << 20) +# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_8 (0 << 19) +# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_4 (1 << 19) +# define GEN7_CLIP_EARLY_CULL (1 << 18) +# define GEN7_CLIP_CULLMODE_BOTH (0 << 16) +# define GEN7_CLIP_CULLMODE_NONE (1 << 16) +# define GEN7_CLIP_CULLMODE_FRONT (2 << 16) +# define GEN7_CLIP_CULLMODE_BACK (3 << 16) +# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10) +/** + * Just does cheap culling based on the clip distance. Bits must be + * disjoint with USER_CLIP_CLIP_DISTANCE bits. + */ +# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT 0 +/* DW2 */ +# define GEN6_CLIP_ENABLE (1 << 31) +# define GEN6_CLIP_API_OGL (0 << 30) +# define GEN6_CLIP_API_D3D (1 << 30) +# define GEN6_CLIP_XY_TEST (1 << 28) +# define GEN6_CLIP_Z_TEST (1 << 27) +# define GEN6_CLIP_GB_TEST (1 << 26) +/** 8-bit field of which user clip distances to clip aganist. */ +# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT 16 +# define GEN6_CLIP_MODE_NORMAL (0 << 13) +# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13) +# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13) +# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9) +# define GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE (1 << 8) +# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4 +# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2 +# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0 +/* DW3 */ +# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17 +# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6 +# define GEN6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5) + +#define _3DSTATE_SF 0x7813 /* GEN6+ */ +/* DW1 (for gen6) */ +# define GEN6_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_SF_SWIZZLE_ENABLE (1 << 21) +# define GEN6_SF_POINT_SPRITE_UPPERLEFT (0 << 20) +# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20) +# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2 */ +# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11) +# define GEN6_SF_STATISTICS_ENABLE (1 << 10) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7) +# define GEN6_SF_FRONT_SOLID (0 << 5) +# define GEN6_SF_FRONT_WIREFRAME (1 << 5) +# define GEN6_SF_FRONT_POINT (2 << 5) +# define GEN6_SF_BACK_SOLID (0 << 3) +# define GEN6_SF_BACK_WIREFRAME (1 << 3) +# define GEN6_SF_BACK_POINT (2 << 3) +# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1) +# define GEN6_SF_WINDING_CCW (1 << 0) +/* DW3 */ +# define GEN6_SF_LINE_AA_ENABLE (1 << 31) +# define GEN6_SF_CULL_BOTH (0 << 29) +# define GEN6_SF_CULL_NONE (1 << 29) +# define GEN6_SF_CULL_FRONT (2 << 29) +# define GEN6_SF_CULL_BACK (3 << 29) +# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */ +# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16) +# define GEN6_SF_SCISSOR_ENABLE (1 << 11) +# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8) +# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8) +# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8) +# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8) +/* DW4 */ +# define GEN6_SF_TRI_PROVOKE_SHIFT 29 +# define GEN6_SF_LINE_PROVOKE_SHIFT 27 +# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25 +# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14) +# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14) +# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12) +# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12) +# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11) +# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */ +/* DW5: depth offset constant */ +/* DW6: depth offset scale */ +/* DW7: depth offset clamp */ +/* DW8 */ +# define ATTRIBUTE_1_OVERRIDE_W (1 << 31) +# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30) +# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29) +# define ATTRIBUTE_1_OVERRIDE_X (1 << 28) +# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25 +# define ATTRIBUTE_1_SWIZZLE_SHIFT 22 +# define ATTRIBUTE_1_SOURCE_SHIFT 16 +# define ATTRIBUTE_0_OVERRIDE_W (1 << 15) +# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14) +# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13) +# define ATTRIBUTE_0_OVERRIDE_X (1 << 12) +# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9 +# define ATTRIBUTE_0_SWIZZLE_SHIFT 6 +# define ATTRIBUTE_0_SOURCE_SHIFT 0 + +# define ATTRIBUTE_SWIZZLE_INPUTATTR 0 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3 +# define ATTRIBUTE_SWIZZLE_SHIFT 6 + +/* DW16: Point sprite texture coordinate enables */ +/* DW17: Constant interpolation enables */ +/* DW18: attr 0-7 wrap shortest enables */ +/* DW19: attr 8-16 wrap shortest enables */ + +/* On GEN7, many fields of 3DSTATE_SF were split out into a new command: + * 3DSTATE_SBE. The remaining fields live in different DWords, but retain + * the same bit-offset. The only new field: + */ +/* GEN7/DW1: */ +# define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT 12 + +#define _3DSTATE_SBE 0x781F /* GEN7+ */ +/* DW1 */ +# define GEN7_SBE_SWIZZLE_CONTROL_MODE (1 << 28) +# define GEN7_SBE_NUM_OUTPUTS_SHIFT 22 +# define GEN7_SBE_SWIZZLE_ENABLE (1 << 21) +# define GEN7_SBE_POINT_SPRITE_LOWERLEFT (1 << 20) +# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2-9: Attribute setup (same as DW8-15 of gen6 _3DSTATE_SF) */ +/* DW10: Point sprite texture coordinate enables */ +/* DW11: Constant interpolation enables */ +/* DW12: attr 0-7 wrap shortest enables */ +/* DW13: attr 8-16 wrap shortest enables */ + +enum brw_wm_barycentric_interp_mode { + BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC = 0, + BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC = 1, + BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC = 2, + BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC = 3, + BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC = 4, + BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC = 5, + BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT = 6 +}; + +#define _3DSTATE_WM 0x7814 /* GEN6+ */ +/* DW1: kernel pointer */ +/* DW2 */ +# define GEN6_WM_SPF_MODE (1 << 31) +# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_WM_SAMPLER_COUNT_SHIFT 27 +# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16) +/* DW3: scratch space */ +/* DW4 */ +# define GEN6_WM_STATISTICS_ENABLE (1 << 31) +# define GEN6_WM_DEPTH_CLEAR (1 << 30) +# define GEN6_WM_DEPTH_RESOLVE (1 << 28) +# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0 +/* DW5 */ +# define GEN6_WM_MAX_THREADS_SHIFT 25 +# define GEN6_WM_KILL_ENABLE (1 << 22) +# define GEN6_WM_COMPUTED_DEPTH (1 << 21) +# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20) +# define GEN6_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16) +# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14) +# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14) +# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14) +# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14) +# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13) +# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 11) +# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9) +# define GEN6_WM_USES_SOURCE_W (1 << 8) +# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7) +# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2) +# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1) +# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20 +# define GEN6_WM_POSOFFSET_NONE (0 << 18) +# define GEN6_WM_POSOFFSET_CENTROID (2 << 18) +# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18) +# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16) +# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16) +# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16) +# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) +# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) +# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) +# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) +# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) +# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) +# define GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 10 +# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9) +# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1) +# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1) +# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1) +# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1) +# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0) +/* DW7: kernel 1 pointer */ +/* DW8: kernel 2 pointer */ + +#define _3DSTATE_CONSTANT_VS 0x7815 /* GEN6+ */ +#define _3DSTATE_CONSTANT_GS 0x7816 /* GEN6+ */ +#define _3DSTATE_CONSTANT_PS 0x7817 /* GEN6+ */ +# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15) +# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14) +# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13) +# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12) + +#define _3DSTATE_CONSTANT_HS 0x7819 /* GEN7+ */ +#define _3DSTATE_CONSTANT_DS 0x781A /* GEN7+ */ + +#define _3DSTATE_STREAMOUT 0x781e /* GEN7+ */ +/* DW1 */ +# define SO_FUNCTION_ENABLE (1 << 31) +# define SO_RENDERING_DISABLE (1 << 30) +/* This selects which incoming rendering stream goes down the pipeline. The + * rendering stream is 0 if not defined by special cases in the GS state. + */ +# define SO_RENDER_STREAM_SELECT_SHIFT 27 +# define SO_RENDER_STREAM_SELECT_MASK INTEL_MASK(28, 27) +/* Controls reordering of TRISTRIP_* elements in stream output (not rendering). + */ +# define SO_REORDER_TRAILING (1 << 26) +/* Controls SO_NUM_PRIMS_WRITTEN_* and SO_PRIM_STORAGE_* */ +# define SO_STATISTICS_ENABLE (1 << 25) +# define SO_BUFFER_ENABLE(n) (1 << (8 + (n))) +/* DW2 */ +# define SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT 29 +# define SO_STREAM_3_VERTEX_READ_OFFSET_MASK INTEL_MASK(29, 29) +# define SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT 24 +# define SO_STREAM_3_VERTEX_READ_LENGTH_MASK INTEL_MASK(28, 24) +# define SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT 21 +# define SO_STREAM_2_VERTEX_READ_OFFSET_MASK INTEL_MASK(21, 21) +# define SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT 16 +# define SO_STREAM_2_VERTEX_READ_LENGTH_MASK INTEL_MASK(20, 16) +# define SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT 13 +# define SO_STREAM_1_VERTEX_READ_OFFSET_MASK INTEL_MASK(13, 13) +# define SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT 8 +# define SO_STREAM_1_VERTEX_READ_LENGTH_MASK INTEL_MASK(12, 8) +# define SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT 5 +# define SO_STREAM_0_VERTEX_READ_OFFSET_MASK INTEL_MASK(5, 5) +# define SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT 0 +# define SO_STREAM_0_VERTEX_READ_LENGTH_MASK INTEL_MASK(4, 0) + +/* 3DSTATE_WM for Gen7 */ +/* DW1 */ +# define GEN7_WM_STATISTICS_ENABLE (1 << 31) +# define GEN7_WM_DEPTH_CLEAR (1 << 30) +# define GEN7_WM_DISPATCH_ENABLE (1 << 29) +# define GEN7_WM_DEPTH_RESOLVE (1 << 28) +# define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) +# define GEN7_WM_KILL_ENABLE (1 << 25) +# define GEN7_WM_PSCDEPTH_OFF (0 << 23) +# define GEN7_WM_PSCDEPTH_ON (1 << 23) +# define GEN7_WM_PSCDEPTH_ON_GE (2 << 23) +# define GEN7_WM_PSCDEPTH_ON_LE (3 << 23) +# define GEN7_WM_USES_SOURCE_DEPTH (1 << 20) +# define GEN7_WM_USES_SOURCE_W (1 << 19) +# define GEN7_WM_POSITION_ZW_PIXEL (0 << 17) +# define GEN7_WM_POSITION_ZW_CENTROID (2 << 17) +# define GEN7_WM_POSITION_ZW_SAMPLE (3 << 17) +# define GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 11 +# define GEN7_WM_USES_INPUT_COVERAGE_MASK (1 << 10) +# define GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 8) +# define GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 8) +# define GEN7_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 8) +# define GEN7_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 8) +# define GEN7_WM_LINE_AA_WIDTH_0_5 (0 << 6) +# define GEN7_WM_LINE_AA_WIDTH_1_0 (1 << 6) +# define GEN7_WM_LINE_AA_WIDTH_2_0 (2 << 6) +# define GEN7_WM_LINE_AA_WIDTH_4_0 (3 << 6) +# define GEN7_WM_POLYGON_STIPPLE_ENABLE (1 << 4) +# define GEN7_WM_LINE_STIPPLE_ENABLE (1 << 3) +# define GEN7_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 2) +# define GEN7_WM_MSRAST_OFF_PIXEL (0 << 0) +# define GEN7_WM_MSRAST_OFF_PATTERN (1 << 0) +# define GEN7_WM_MSRAST_ON_PIXEL (2 << 0) +# define GEN7_WM_MSRAST_ON_PATTERN (3 << 0) +/* DW2 */ +# define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31) + +#define _3DSTATE_PS 0x7820 /* GEN7+ */ +/* DW1: kernel pointer */ +/* DW2 */ +# define GEN7_PS_SPF_MODE (1 << 31) +# define GEN7_PS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN7_PS_SAMPLER_COUNT_SHIFT 27 +# define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN7_PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN7_PS_FLOATING_POINT_MODE_ALT (1 << 16) +/* DW3: scratch space */ +/* DW4 */ +# define GEN7_PS_MAX_THREADS_SHIFT 24 +# define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11) +# define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10) +# define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9) +# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7) +# define GEN7_PS_POSOFFSET_NONE (0 << 3) +# define GEN7_PS_POSOFFSET_CENTROID (2 << 3) +# define GEN7_PS_POSOFFSET_SAMPLE (3 << 3) +# define GEN7_PS_32_DISPATCH_ENABLE (1 << 2) +# define GEN7_PS_16_DISPATCH_ENABLE (1 << 1) +# define GEN7_PS_8_DISPATCH_ENABLE (1 << 0) +/* DW5 */ +# define GEN7_PS_DISPATCH_START_GRF_SHIFT_0 16 +# define GEN7_PS_DISPATCH_START_GRF_SHIFT_1 8 +# define GEN7_PS_DISPATCH_START_GRF_SHIFT_2 0 +/* DW6: kernel 1 pointer */ +/* DW7: kernel 2 pointer */ + +#define _3DSTATE_SAMPLE_MASK 0x7818 /* GEN6+ */ + +#define _3DSTATE_DRAWING_RECTANGLE 0x7900 +#define _3DSTATE_BLEND_CONSTANT_COLOR 0x7901 +#define _3DSTATE_CHROMA_KEY 0x7904 +#define _3DSTATE_DEPTH_BUFFER 0x7905 /* GEN4-6 */ +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x7906 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x7907 +#define _3DSTATE_LINE_STIPPLE_PATTERN 0x7908 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 +#define _3DSTATE_AA_LINE_PARAMETERS 0x790a /* G45+ */ + +#define _3DSTATE_GS_SVB_INDEX 0x790b /* CTG+ */ +/* DW1 */ +# define SVB_INDEX_SHIFT 29 +# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */ +/* DW2: SVB index */ +/* DW3: SVB maximum index */ + +#define _3DSTATE_MULTISAMPLE 0x790d /* GEN6+ */ +/* DW1 */ +# define MS_PIXEL_LOCATION_CENTER (0 << 4) +# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define MS_NUMSAMPLES_1 (0 << 1) +# define MS_NUMSAMPLES_4 (2 << 1) +# define MS_NUMSAMPLES_8 (3 << 1) + +#define _3DSTATE_STENCIL_BUFFER 0x790e /* ILK, SNB */ +#define _3DSTATE_HIER_DEPTH_BUFFER 0x790f /* ILK, SNB */ + +#define GEN7_3DSTATE_CLEAR_PARAMS 0x7804 +#define GEN7_3DSTATE_DEPTH_BUFFER 0x7805 +#define GEN7_3DSTATE_STENCIL_BUFFER 0x7806 +#define GEN7_3DSTATE_HIER_DEPTH_BUFFER 0x7807 + +#define _3DSTATE_CLEAR_PARAMS 0x7910 /* ILK, SNB */ +# define DEPTH_CLEAR_VALID (1 << 15) +/* DW1: depth clear value */ + +#define _3DSTATE_SO_DECL_LIST 0x7917 /* GEN7+ */ +/* DW1 */ +# define SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT 12 +# define SO_STREAM_TO_BUFFER_SELECTS_3_MASK INTEL_MASK(15, 12) +# define SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT 8 +# define SO_STREAM_TO_BUFFER_SELECTS_2_MASK INTEL_MASK(11, 8) +# define SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT 4 +# define SO_STREAM_TO_BUFFER_SELECTS_1_MASK INTEL_MASK(7, 4) +# define SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT 0 +# define SO_STREAM_TO_BUFFER_SELECTS_0_MASK INTEL_MASK(3, 0) +/* DW2 */ +# define SO_NUM_ENTRIES_3_SHIFT 24 +# define SO_NUM_ENTRIES_3_MASK INTEL_MASK(31, 24) +# define SO_NUM_ENTRIES_2_SHIFT 16 +# define SO_NUM_ENTRIES_2_MASK INTEL_MASK(23, 16) +# define SO_NUM_ENTRIES_1_SHIFT 8 +# define SO_NUM_ENTRIES_1_MASK INTEL_MASK(15, 8) +# define SO_NUM_ENTRIES_0_SHIFT 0 +# define SO_NUM_ENTRIES_0_MASK INTEL_MASK(7, 0) + +/* SO_DECL DW0 */ +# define SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT 12 +# define SO_DECL_OUTPUT_BUFFER_SLOT_MASK INTEL_MASK(13, 12) +# define SO_DECL_HOLE_FLAG (1 << 11) +# define SO_DECL_REGISTER_INDEX_SHIFT 4 +# define SO_DECL_REGISTER_INDEX_MASK INTEL_MASK(9, 4) +# define SO_DECL_COMPONENT_MASK_SHIFT 0 +# define SO_DECL_COMPONENT_MASK_MASK INTEL_MASK(3, 0) + +#define _3DSTATE_SO_BUFFER 0x7918 /* GEN7+ */ +/* DW1 */ +# define SO_BUFFER_INDEX_SHIFT 29 +# define SO_BUFFER_INDEX_MASK INTEL_MASK(30, 29) +# define SO_BUFFER_PITCH_SHIFT 0 +# define SO_BUFFER_PITCH_MASK INTEL_MASK(11, 0) +/* DW2: start address */ +/* DW3: end address. */ + +#define CMD_PIPE_CONTROL 0x7a00 + +#define CMD_MI_FLUSH 0x0200 + + +/* Bitfields for the URB_WRITE message, DW2 of message header: */ +#define URB_WRITE_PRIM_END 0x1 +#define URB_WRITE_PRIM_START 0x2 +#define URB_WRITE_PRIM_TYPE_SHIFT 2 + + +/* Maximum number of entries that can be addressed using a binding table + * pointer of type SURFTYPE_BUFFER + */ +#define BRW_MAX_NUM_BUFFER_ENTRIES (1 << 27) + +#include "brw_chipset.h" + +#endif diff --git a/backend/src/gen/brw_disasm.c b/backend/src/gen/brw_disasm.c index eecafe2..95fc997 100644 --- a/backend/src/gen/brw_disasm.c +++ b/backend/src/gen/brw_disasm.c @@ -25,9 +25,11 @@ #include //#include "main/mtypes.h" - //#include "brw_context.h" #include "brw_defines.h" +#include "brw_structs.h" + +#include struct { char *name; @@ -633,7 +635,7 @@ static int src_da1 (FILE *file, uint32_t type, uint32_t _reg_file, static int src_ia1 (FILE *file, uint32_t type, uint32_t _reg_file, - GLint _addr_imm, + int _addr_imm, uint32_t _addr_subreg_nr, uint32_t _negate, uint32_t __abs, diff --git a/backend/src/gen/brw_eu.c b/backend/src/gen/brw_eu.c index 006d5e5..50031fc 100644 --- a/backend/src/gen/brw_eu.c +++ b/backend/src/gen/brw_eu.c @@ -23,11 +23,15 @@ */ -#include "brw_context.h" +// #include "brw_context.h" #include "brw_defines.h" #include "brw_eu.h" -#include "glsl/ralloc.h" +#include +#include +#include + +// #include "glsl/ralloc.h" /* Returns the corresponding conditional mod for swapping src0 and * src1 in e.g. CMP. @@ -56,7 +60,7 @@ brw_swap_cmod(uint32_t cmod) /* How does predicate control work when execution_size != 8? Do I * need to test/set for 0xffff when execution_size is 16? */ -void brw_set_predicate_control_flag_value( struct brw_compile *p, uint32_t value ) +void brw_set_predicate_control_flag_value(struct brw_compile *p, uint32_t value) { p->current->header.predicate_control = BRW_PREDICATE_NONE; @@ -72,7 +76,7 @@ void brw_set_predicate_control_flag_value( struct brw_compile *p, uint32_t value } } -void brw_set_predicate_control( struct brw_compile *p, uint32_t pc ) +void brw_set_predicate_control(struct brw_compile *p, uint32_t pc) { p->current->header.predicate_control = pc; } @@ -82,12 +86,12 @@ void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse) p->current->header.predicate_inverse = predicate_inverse; } -void brw_set_conditionalmod( struct brw_compile *p, uint32_t conditional ) +void brw_set_conditionalmod(struct brw_compile *p, uint32_t conditional) { p->current->header.destreg__conditionalmod = conditional; } -void brw_set_access_mode( struct brw_compile *p, uint32_t access_mode ) +void brw_set_access_mode(struct brw_compile *p, uint32_t access_mode) { p->current->header.access_mode = access_mode; } @@ -98,7 +102,7 @@ brw_set_compression_control(struct brw_compile *p, { p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED); - if (p->brw->intel.gen >= 6) { + if (p->gen >= 6) { /* Since we don't use the 32-wide support in gen6, we translate * the pre-gen6 compression control here. */ @@ -129,23 +133,25 @@ brw_set_compression_control(struct brw_compile *p, } } -void brw_set_mask_control( struct brw_compile *p, uint32_t value ) +void brw_set_mask_control(struct brw_compile *p, uint32_t value) { p->current->header.mask_control = value; } -void brw_set_saturate( struct brw_compile *p, uint32_t value ) +void brw_set_saturate(struct brw_compile *p, uint32_t value) { p->current->header.saturate = value; } +#if 0 void brw_set_acc_write_control(struct brw_compile *p, uint32_t value) { if (p->brw->intel.gen >= 6) p->current->header.acc_wr_control = value; } +#endif -void brw_push_insn_state( struct brw_compile *p ) +void brw_push_insn_state(struct brw_compile *p) { assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); @@ -153,7 +159,7 @@ void brw_push_insn_state( struct brw_compile *p ) p->current++; } -void brw_pop_insn_state( struct brw_compile *p ) +void brw_pop_insn_state(struct brw_compile *p) { assert(p->current != p->stack); p->current--; @@ -161,6 +167,7 @@ void brw_pop_insn_state( struct brw_compile *p ) } +#if 0 /*********************************************************************** */ void @@ -200,8 +207,8 @@ brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) } -const uint32_t *brw_get_program( struct brw_compile *p, - uint32_t *sz ) +const uint32_t *brw_get_program(struct brw_compile *p, + uint32_t *sz) { uint32_t i; @@ -335,3 +342,4 @@ brw_resolve_cals(struct brw_compile *c) c->first_label = NULL; } } +#endif diff --git a/backend/src/gen/brw_eu.h b/backend/src/gen/brw_eu.h index e3d8a1b..1553ce9 100644 --- a/backend/src/gen/brw_eu.h +++ b/backend/src/gen/brw_eu.h @@ -21,15 +21,17 @@ * Authors: * Keith Whitwell */ - - #ifndef BRW_EU_H #define BRW_EU_H #include +#include #include "brw_structs.h" #include "brw_defines.h" -#include "program/prog_instruction.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) @@ -42,17 +44,32 @@ #define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) +#define WRITEMASK_X 0x1 +#define WRITEMASK_Y 0x2 +#define WRITEMASK_XY 0x3 +#define WRITEMASK_Z 0x4 +#define WRITEMASK_XZ 0x5 +#define WRITEMASK_YZ 0x6 +#define WRITEMASK_XYZ 0x7 +#define WRITEMASK_W 0x8 +#define WRITEMASK_XW 0x9 +#define WRITEMASK_YW 0xa +#define WRITEMASK_XYW 0xb +#define WRITEMASK_ZW 0xc +#define WRITEMASK_XZW 0xd +#define WRITEMASK_YZW 0xe +#define WRITEMASK_XYZW 0xf + static inline bool brw_is_single_value_swizzle(int swiz) { return (swiz == BRW_SWIZZLE_XXXX || - swiz == BRW_SWIZZLE_YYYY || - swiz == BRW_SWIZZLE_ZZZZ || - swiz == BRW_SWIZZLE_WWWW); + swiz == BRW_SWIZZLE_YYYY || + swiz == BRW_SWIZZLE_ZZZZ || + swiz == BRW_SWIZZLE_WWWW); } #define REG_SIZE (8*4) - /* These aren't hardware structs, just something useful for us to pass around: * * Align1 operation has a lot of control over input ranges. Used in @@ -61,109 +78,70 @@ static inline bool brw_is_single_value_swizzle(int swiz) */ struct brw_reg { - GLuint type:4; - GLuint file:2; - GLuint nr:8; - GLuint subnr:5; /* :1 in align16 */ - GLuint negate:1; /* source only */ - GLuint abs:1; /* source only */ - GLuint vstride:4; /* source only */ - GLuint width:3; /* src only, align1 only */ - GLuint hstride:2; /* align1 only */ - GLuint address_mode:1; /* relative addressing, hopefully! */ - GLuint pad0:1; - - union { + uint32_t type:4; + uint32_t file:2; + uint32_t nr:8; + uint32_t subnr:5; /* :1 in align16 */ + uint32_t negate:1; /* source only */ + uint32_t abs:1; /* source only */ + uint32_t vstride:4; /* source only */ + uint32_t width:3; /* src only, align1 only */ + uint32_t hstride:2; /* align1 only */ + uint32_t address_mode:1; /* relative addressing, hopefully! */ + uint32_t pad0:1; + + union { struct { - GLuint swizzle:8; /* src only, align16 only */ - GLuint writemask:4; /* dest only, align16 only */ - GLint indirect_offset:10; /* relative addressing offset */ - GLuint pad1:10; /* two dwords total */ + uint32_t swizzle:8; /* src only, align16 only */ + uint32_t writemask:4; /* dest only, align16 only */ + int indirect_offset:10; /* relative addressing offset */ + uint32_t pad1:10; /* two dwords total */ } bits; - GLfloat f; - GLint d; - GLuint ud; - } dw1; + float f; + int d; + uint32_t ud; + } dw1; }; struct brw_indirect { - GLuint addr_subnr:4; - GLint addr_offset:10; - GLuint pad:18; + uint32_t addr_subnr:4; + int addr_offset:10; + uint32_t pad:18; }; - -struct brw_glsl_label; -struct brw_glsl_call; - - - #define BRW_EU_MAX_INSN_STACK 5 - +#define BRW_MAX_INSTRUCTION_NUM 8192 struct brw_compile { - struct brw_instruction *store; - int store_size; - GLuint nr_insn; - - void *mem_ctx; - - /* Allow clients to push/pop instruction state: - */ - struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; - bool compressed_stack[BRW_EU_MAX_INSN_STACK]; - struct brw_instruction *current; - - GLuint flag_value; - bool single_program_flow; - bool compressed; - struct brw_context *brw; - - /* Control flow stacks: - * - if_stack contains IF and ELSE instructions which must be patched - * (and popped) once the matching ENDIF instruction is encountered. - * - * Just store the instruction pointer(an index). - */ - int *if_stack; - int if_stack_depth; - int if_stack_array_size; - - /** - * loop_stack contains the instruction pointers of the starts of loops which - * must be patched (and popped) once the matching WHILE instruction is - * encountered. - */ - int *loop_stack; - /** - * pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF - * blocks they were popping out of, to fix up the mask stack. This tracks - * the IF/ENDIF nesting in each current nested loop level. - */ - int *if_depth_in_loop; - int loop_stack_depth; - int loop_stack_array_size; - - struct brw_glsl_label *first_label; /**< linked list of labels */ - struct brw_glsl_call *first_call; /**< linked list of CALs */ + int gen; + struct brw_instruction store[8192]; + int store_size; + uint32_t nr_insn; + + /* Allow clients to push/pop instruction state */ + struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + bool compressed_stack[BRW_EU_MAX_INSN_STACK]; + struct brw_instruction *current; + + uint32_t flag_value; + bool single_program_flow; + bool compressed; + struct brw_context *brw; }; - void -brw_save_label(struct brw_compile *c, const char *name, GLuint position); +brw_save_label(struct brw_compile *c, const char *name, uint32_t position); void -brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos); +brw_save_call(struct brw_compile *c, const char *name, uint32_t call_pos); void brw_resolve_cals(struct brw_compile *c); - - -static INLINE int type_sz( GLuint type ) +static inline int type_sz(uint32_t type) { - switch( type ) { + switch(type) { case BRW_REGISTER_TYPE_UD: case BRW_REGISTER_TYPE_D: case BRW_REGISTER_TYPE_F: @@ -192,15 +170,15 @@ static INLINE int type_sz( GLuint type ) * \param swizzle one of BRW_SWIZZLE_x * \param writemask WRITEMASK_X/Y/Z/W bitfield */ -static INLINE struct brw_reg brw_reg( GLuint file, - GLuint nr, - GLuint subnr, - GLuint type, - GLuint vstride, - GLuint width, - GLuint hstride, - GLuint swizzle, - GLuint writemask ) +static inline struct brw_reg brw_reg(uint32_t file, + uint32_t nr, + uint32_t subnr, + uint32_t type, + uint32_t vstride, + uint32_t width, + uint32_t hstride, + uint32_t swizzle, + uint32_t writemask) { struct brw_reg reg; if (file == BRW_GENERAL_REGISTER_FILE) @@ -236,166 +214,159 @@ static INLINE struct brw_reg brw_reg( GLuint file, } /** Construct float[16] register */ -static INLINE struct brw_reg brw_vec16_reg( GLuint file, - GLuint nr, - GLuint subnr ) +static inline struct brw_reg brw_vec16_reg(uint32_t file, + uint32_t nr, + uint32_t subnr) { return brw_reg(file, - nr, - subnr, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_16, - BRW_WIDTH_16, - BRW_HORIZONTAL_STRIDE_1, - BRW_SWIZZLE_XYZW, - WRITEMASK_XYZW); + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_16, + BRW_WIDTH_16, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); } /** Construct float[8] register */ -static INLINE struct brw_reg brw_vec8_reg( GLuint file, - GLuint nr, - GLuint subnr ) +static inline struct brw_reg brw_vec8_reg(uint32_t file, + uint32_t nr, + uint32_t subnr) { return brw_reg(file, - nr, - subnr, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_8, - BRW_WIDTH_8, - BRW_HORIZONTAL_STRIDE_1, - BRW_SWIZZLE_XYZW, - WRITEMASK_XYZW); + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); } /** Construct float[4] register */ -static INLINE struct brw_reg brw_vec4_reg( GLuint file, - GLuint nr, - GLuint subnr ) +static inline struct brw_reg brw_vec4_reg(uint32_t file, + uint32_t nr, + uint32_t subnr) { return brw_reg(file, - nr, - subnr, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_4, - BRW_WIDTH_4, - BRW_HORIZONTAL_STRIDE_1, - BRW_SWIZZLE_XYZW, - WRITEMASK_XYZW); + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); } /** Construct float[2] register */ -static INLINE struct brw_reg brw_vec2_reg( GLuint file, - GLuint nr, - GLuint subnr ) +static inline struct brw_reg brw_vec2_reg(uint32_t file, + uint32_t nr, + uint32_t subnr) { return brw_reg(file, - nr, - subnr, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_2, - BRW_WIDTH_2, - BRW_HORIZONTAL_STRIDE_1, - BRW_SWIZZLE_XYXY, - WRITEMASK_XY); + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYXY, + WRITEMASK_XY); } /** Construct float[1] register */ -static INLINE struct brw_reg brw_vec1_reg( GLuint file, - GLuint nr, - GLuint subnr ) +static inline struct brw_reg brw_vec1_reg(uint32_t file, + uint32_t nr, + uint32_t subnr) { return brw_reg(file, - nr, - subnr, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_0, - BRW_WIDTH_1, - BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XXXX, - WRITEMASK_X); + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + WRITEMASK_X); } -static INLINE struct brw_reg retype( struct brw_reg reg, - GLuint type ) +static inline struct brw_reg retype(struct brw_reg reg, uint32_t type) { reg.type = type; return reg; } -static inline struct brw_reg -sechalf(struct brw_reg reg) +static inline struct brw_reg sechalf(struct brw_reg reg) { if (reg.vstride) reg.nr++; return reg; } -static INLINE struct brw_reg suboffset( struct brw_reg reg, - GLuint delta ) -{ +static inline struct brw_reg suboffset(struct brw_reg reg, uint32_t delta) +{ reg.subnr += delta * type_sz(reg.type); return reg; } - -static INLINE struct brw_reg offset( struct brw_reg reg, - GLuint delta ) +static inline struct brw_reg offset(struct brw_reg reg, uint32_t delta) { reg.nr += delta; return reg; } - -static INLINE struct brw_reg byte_offset( struct brw_reg reg, - GLuint bytes ) +static inline struct brw_reg byte_offset(struct brw_reg reg, uint32_t bytes) { - GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; + uint32_t newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; reg.nr = newoffset / REG_SIZE; reg.subnr = newoffset % REG_SIZE; return reg; } - + /** Construct unsigned word[16] register */ -static INLINE struct brw_reg brw_uw16_reg( GLuint file, - GLuint nr, - GLuint subnr ) +static inline struct brw_reg brw_uw16_reg(uint32_t file, + uint32_t nr, + uint32_t subnr) { return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); } /** Construct unsigned word[8] register */ -static INLINE struct brw_reg brw_uw8_reg( GLuint file, - GLuint nr, - GLuint subnr ) +static inline struct brw_reg brw_uw8_reg(uint32_t file, + uint32_t nr, + uint32_t subnr) { return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); } /** Construct unsigned word[1] register */ -static INLINE struct brw_reg brw_uw1_reg( GLuint file, - GLuint nr, - GLuint subnr ) +static inline struct brw_reg brw_uw1_reg(uint32_t file, + uint32_t nr, + uint32_t subnr) { return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); } -static INLINE struct brw_reg brw_imm_reg( GLuint type ) +static inline struct brw_reg brw_imm_reg(uint32_t type) { - return brw_reg( BRW_IMMEDIATE_VALUE, - 0, - 0, - type, - BRW_VERTICAL_STRIDE_0, - BRW_WIDTH_1, - BRW_HORIZONTAL_STRIDE_0, - 0, - 0); + return brw_reg(BRW_IMMEDIATE_VALUE, + 0, + 0, + type, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + 0, + 0); } /** Construct float immediate register */ -static INLINE struct brw_reg brw_imm_f( GLfloat f ) +static inline struct brw_reg brw_imm_f(float f) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); imm.dw1.f = f; @@ -403,7 +374,7 @@ static INLINE struct brw_reg brw_imm_f( GLfloat f ) } /** Construct integer immediate register */ -static INLINE struct brw_reg brw_imm_d( GLint d ) +static inline struct brw_reg brw_imm_d(int d) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); imm.dw1.d = d; @@ -411,7 +382,7 @@ static INLINE struct brw_reg brw_imm_d( GLint d ) } /** Construct uint immediate register */ -static INLINE struct brw_reg brw_imm_ud( GLuint ud ) +static inline struct brw_reg brw_imm_ud(uint32_t ud) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); imm.dw1.ud = ud; @@ -419,7 +390,7 @@ static INLINE struct brw_reg brw_imm_ud( GLuint ud ) } /** Construct ushort immediate register */ -static INLINE struct brw_reg brw_imm_uw( GLushort uw ) +static inline struct brw_reg brw_imm_uw(uint16_t uw) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); imm.dw1.ud = uw | (uw << 16); @@ -427,7 +398,7 @@ static INLINE struct brw_reg brw_imm_uw( GLushort uw ) } /** Construct short immediate register */ -static INLINE struct brw_reg brw_imm_w( GLshort w ) +static inline struct brw_reg brw_imm_w(short w) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); imm.dw1.d = w | (w << 16); @@ -439,7 +410,7 @@ static INLINE struct brw_reg brw_imm_w( GLshort w ) */ /** Construct vector of eight signed half-byte values */ -static INLINE struct brw_reg brw_imm_v( GLuint v ) +static inline struct brw_reg brw_imm_v(uint32_t v) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); imm.vstride = BRW_VERTICAL_STRIDE_0; @@ -450,7 +421,7 @@ static INLINE struct brw_reg brw_imm_v( GLuint v ) } /** Construct vector of four 8-bit float values */ -static INLINE struct brw_reg brw_imm_vf( GLuint v ) +static inline struct brw_reg brw_imm_vf(uint32_t v) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); imm.vstride = BRW_VERTICAL_STRIDE_0; @@ -464,148 +435,144 @@ static INLINE struct brw_reg brw_imm_vf( GLuint v ) #define VF_ONE 0x30 #define VF_NEG (1<<7) -static INLINE struct brw_reg brw_imm_vf4( GLuint v0, - GLuint v1, - GLuint v2, - GLuint v3) +static inline struct brw_reg brw_imm_vf4(uint32_t v0, + uint32_t v1, + uint32_t v2, + uint32_t v3) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); imm.vstride = BRW_VERTICAL_STRIDE_0; imm.width = BRW_WIDTH_4; imm.hstride = BRW_HORIZONTAL_STRIDE_1; imm.dw1.ud = ((v0 << 0) | - (v1 << 8) | - (v2 << 16) | - (v3 << 24)); + (v1 << 8) | + (v2 << 16) | + (v3 << 24)); return imm; } -static INLINE struct brw_reg brw_address( struct brw_reg reg ) +static inline struct brw_reg brw_address(struct brw_reg reg) { return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); } /** Construct float[1] general-purpose register */ -static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr ) +static inline struct brw_reg brw_vec1_grf(uint32_t nr, uint32_t subnr) { return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } /** Construct float[2] general-purpose register */ -static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr ) +static inline struct brw_reg brw_vec2_grf(uint32_t nr, uint32_t subnr) { return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } /** Construct float[4] general-purpose register */ -static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr ) +static inline struct brw_reg brw_vec4_grf(uint32_t nr, uint32_t subnr) { return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } /** Construct float[8] general-purpose register */ -static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr ) +static inline struct brw_reg brw_vec8_grf(uint32_t nr, uint32_t subnr) { return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } -static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr ) +static inline struct brw_reg brw_uw8_grf(uint32_t nr, uint32_t subnr) { return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } -static INLINE struct brw_reg brw_uw16_grf( GLuint nr, GLuint subnr ) +static inline struct brw_reg brw_uw16_grf(uint32_t nr, uint32_t subnr) { return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } - /** Construct null register (usually used for setting condition codes) */ -static INLINE struct brw_reg brw_null_reg( void ) +static inline struct brw_reg brw_null_reg(void) { - return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_NULL, - 0); + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NULL, + 0); } -static INLINE struct brw_reg brw_address_reg( GLuint subnr ) +static inline struct brw_reg brw_address_reg(uint32_t subnr) { - return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_ADDRESS, - subnr); + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, + subnr); } /* If/else instructions break in align16 mode if writemask & swizzle * aren't xyzw. This goes against the convention for other scalar * regs: */ -static INLINE struct brw_reg brw_ip_reg( void ) +static inline struct brw_reg brw_ip_reg(void) { - return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_IP, - 0, - BRW_REGISTER_TYPE_UD, - BRW_VERTICAL_STRIDE_4, /* ? */ - BRW_WIDTH_1, - BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, /* NOTE! */ - WRITEMASK_XYZW); /* NOTE! */ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_IP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_4, /* ? */ + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, /* NOTE! */ + WRITEMASK_XYZW); /* NOTE! */ } -static INLINE struct brw_reg brw_acc_reg( void ) +static inline struct brw_reg brw_acc_reg(void) { - return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_ACCUMULATOR, - 0); + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ACCUMULATOR, + 0); } -static INLINE struct brw_reg brw_notification_1_reg(void) +static inline struct brw_reg brw_notification_1_reg(void) { return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_NOTIFICATION_COUNT, - 1, - BRW_REGISTER_TYPE_UD, - BRW_VERTICAL_STRIDE_0, - BRW_WIDTH_1, - BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XXXX, - WRITEMASK_X); + BRW_ARF_NOTIFICATION_COUNT, + 1, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + WRITEMASK_X); } -static INLINE struct brw_reg brw_flag_reg( void ) +static inline struct brw_reg brw_flag_reg(void) { return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_FLAG, - 0); + BRW_ARF_FLAG, + 0); } -static INLINE struct brw_reg brw_mask_reg( GLuint subnr ) +static inline struct brw_reg brw_mask_reg(uint32_t subnr) { return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_MASK, - subnr); + BRW_ARF_MASK, + subnr); } -static INLINE struct brw_reg brw_message_reg( GLuint nr ) +static inline struct brw_reg brw_message_reg(uint32_t nr) { assert((nr & ~(1 << 7)) < BRW_MAX_MRF); return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, - nr, - 0); + nr, + 0); } - - - /* This is almost always called with a numeric constant argument, so * make things easy to evaluate at compile time: */ -static INLINE GLuint cvt( GLuint val ) +static inline uint32_t cvt(uint32_t val) { switch (val) { case 0: return 0; @@ -619,10 +586,10 @@ static INLINE GLuint cvt( GLuint val ) return 0; } -static INLINE struct brw_reg stride( struct brw_reg reg, - GLuint vstride, - GLuint width, - GLuint hstride ) +static inline struct brw_reg stride(struct brw_reg reg, + uint32_t vstride, + uint32_t width, + uint32_t hstride) { reg.vstride = cvt(vstride); reg.width = cvt(width) - 1; @@ -631,103 +598,98 @@ static INLINE struct brw_reg stride( struct brw_reg reg, } -static INLINE struct brw_reg vec16( struct brw_reg reg ) +static inline struct brw_reg vec16(struct brw_reg reg) { return stride(reg, 16,16,1); } -static INLINE struct brw_reg vec8( struct brw_reg reg ) +static inline struct brw_reg vec8(struct brw_reg reg) { return stride(reg, 8,8,1); } -static INLINE struct brw_reg vec4( struct brw_reg reg ) +static inline struct brw_reg vec4(struct brw_reg reg) { return stride(reg, 4,4,1); } -static INLINE struct brw_reg vec2( struct brw_reg reg ) +static inline struct brw_reg vec2(struct brw_reg reg) { return stride(reg, 2,2,1); } -static INLINE struct brw_reg vec1( struct brw_reg reg ) +static inline struct brw_reg vec1(struct brw_reg reg) { return stride(reg, 0,1,0); } - -static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt ) +static inline struct brw_reg get_element(struct brw_reg reg, uint32_t elt) { return vec1(suboffset(reg, elt)); } -static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt ) +static inline struct brw_reg get_element_ud(struct brw_reg reg, uint32_t elt) { return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); } -static INLINE struct brw_reg get_element_d( struct brw_reg reg, GLuint elt ) +static inline struct brw_reg get_element_d(struct brw_reg reg, uint32_t elt) { return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt)); } - -static INLINE struct brw_reg brw_swizzle( struct brw_reg reg, - GLuint x, - GLuint y, - GLuint z, - GLuint w) +static inline struct brw_reg brw_swizzle(struct brw_reg reg, + uint32_t x, + uint32_t y, + uint32_t z, + uint32_t w) { assert(reg.file != BRW_IMMEDIATE_VALUE); reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), - BRW_GET_SWZ(reg.dw1.bits.swizzle, y), - BRW_GET_SWZ(reg.dw1.bits.swizzle, z), - BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); + BRW_GET_SWZ(reg.dw1.bits.swizzle, y), + BRW_GET_SWZ(reg.dw1.bits.swizzle, z), + BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); return reg; } -static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg, - GLuint x ) +static inline struct brw_reg brw_swizzle1(struct brw_reg reg, + uint32_t x) { return brw_swizzle(reg, x, x, x, x); } -static INLINE struct brw_reg brw_writemask( struct brw_reg reg, - GLuint mask ) +static inline struct brw_reg brw_writemask(struct brw_reg reg, + uint32_t mask) { assert(reg.file != BRW_IMMEDIATE_VALUE); reg.dw1.bits.writemask &= mask; return reg; } -static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg, - GLuint mask ) +static inline struct brw_reg brw_set_writemask(struct brw_reg reg, uint32_t mask) { assert(reg.file != BRW_IMMEDIATE_VALUE); reg.dw1.bits.writemask = mask; return reg; } -static INLINE struct brw_reg negate( struct brw_reg reg ) +static inline struct brw_reg negate(struct brw_reg reg) { reg.negate ^= 1; return reg; } -static INLINE struct brw_reg brw_abs( struct brw_reg reg ) +static inline struct brw_reg brw_abs(struct brw_reg reg) { reg.abs = 1; reg.negate = 0; return reg; } -/*********************************************************************** - */ -static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr, - GLint offset ) +static inline struct brw_reg brw_vec4_indirect(uint32_t subnr, + int offset) { struct brw_reg reg = brw_vec4_grf(0, 0); reg.subnr = subnr; @@ -736,8 +698,7 @@ static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr, return reg; } -static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr, - GLint offset ) +static inline struct brw_reg brw_vec1_indirect(uint32_t subnr, int offset) { struct brw_reg reg = brw_vec1_grf(0, 0); reg.subnr = subnr; @@ -746,48 +707,48 @@ static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr, return reg; } -static INLINE struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset) +static inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset) { return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); } -static INLINE struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset) +static inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset) { return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); } -static INLINE struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset) +static inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset) { return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); } -static INLINE struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset) +static inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset) { return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); } -static INLINE struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset) +static inline struct brw_reg deref_1d(struct brw_indirect ptr, int offset) { return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D); } -static INLINE struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset) +static inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset) { return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); } -static INLINE struct brw_reg get_addr_reg(struct brw_indirect ptr) +static inline struct brw_reg get_addr_reg(struct brw_indirect ptr) { return brw_address_reg(ptr.addr_subnr); } -static INLINE struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset ) +static inline struct brw_indirect brw_indirect_offset(struct brw_indirect ptr, int offset) { ptr.addr_offset += offset; return ptr; } -static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset ) +static inline struct brw_indirect brw_indirect(uint32_t addr_subnr, int offset) { struct brw_indirect ptr; ptr.addr_subnr = addr_subnr; @@ -797,62 +758,62 @@ static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset } /** Do two brw_regs refer to the same register? */ -static INLINE bool +static inline bool brw_same_reg(struct brw_reg r1, struct brw_reg r2) { return r1.file == r2.file && r1.nr == r2.nr; } -static INLINE struct brw_instruction *current_insn( struct brw_compile *p) +static inline struct brw_instruction *current_insn(struct brw_compile *p) { return &p->store[p->nr_insn]; } -void brw_pop_insn_state( struct brw_compile *p ); -void brw_push_insn_state( struct brw_compile *p ); -void brw_set_mask_control( struct brw_compile *p, GLuint value ); -void brw_set_saturate( struct brw_compile *p, GLuint value ); -void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ); +void brw_pop_insn_state(struct brw_compile *p); +void brw_push_insn_state(struct brw_compile *p); +void brw_set_mask_control(struct brw_compile *p, uint32_t value); +void brw_set_saturate(struct brw_compile *p, uint32_t value); +void brw_set_access_mode(struct brw_compile *p, uint32_t access_mode); void brw_set_compression_control(struct brw_compile *p, enum brw_compression c); -void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value ); -void brw_set_predicate_control( struct brw_compile *p, GLuint pc ); +void brw_set_predicate_control_flag_value(struct brw_compile *p, uint32_t value); +void brw_set_predicate_control(struct brw_compile *p, uint32_t pc); void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse); -void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ); -void brw_set_acc_write_control(struct brw_compile *p, GLuint value); +void brw_set_conditionalmod(struct brw_compile *p, uint32_t conditional); +void brw_set_acc_write_control(struct brw_compile *p, uint32_t value); void brw_init_compile(struct brw_context *, struct brw_compile *p, - void *mem_ctx); -const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz ); + void *mem_ctx); +const uint32_t *brw_get_program(struct brw_compile *p, uint32_t *sz); -struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode); +struct brw_instruction *brw_next_insn(struct brw_compile *p, uint32_t opcode); void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, - struct brw_reg dest); + struct brw_reg dest); void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, - struct brw_reg reg); + struct brw_reg reg); void gen6_resolve_implied_move(struct brw_compile *p, - struct brw_reg *src, - GLuint msg_reg_nr); + struct brw_reg *src, + uint32_t msg_reg_nr); /* Helpers for regular instructions: */ -#define ALU1(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ - struct brw_reg dest, \ - struct brw_reg src0); - -#define ALU2(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ - struct brw_reg dest, \ - struct brw_reg src0, \ - struct brw_reg src1); - -#define ALU3(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ - struct brw_reg dest, \ - struct brw_reg src0, \ - struct brw_reg src1, \ - struct brw_reg src2); +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0); + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1); + +#define ALU3(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1, \ + struct brw_reg src2); #define ROUND(OP) \ void brw_##OP(struct brw_compile *p, struct brw_reg dest, struct brw_reg src0); @@ -893,176 +854,101 @@ ROUND(RNDE) #undef ROUND -/* Helpers for SEND instruction: - */ +/* Helpers for SEND instruction */ void brw_set_sampler_message(struct brw_compile *p, struct brw_instruction *insn, - GLuint binding_table_index, - GLuint sampler, - GLuint msg_type, - GLuint response_length, - GLuint msg_length, - GLuint header_present, - GLuint simd_mode, - GLuint return_format); + uint32_t binding_table_index, + uint32_t sampler, + uint32_t msg_type, + uint32_t response_length, + uint32_t msg_length, + uint32_t header_present, + uint32_t simd_mode, + uint32_t return_format); void brw_set_dp_read_message(struct brw_compile *p, - struct brw_instruction *insn, - GLuint binding_table_index, - GLuint msg_control, - GLuint msg_type, - GLuint target_cache, - GLuint msg_length, - GLuint response_length); + struct brw_instruction *insn, + uint32_t binding_table_index, + uint32_t msg_control, + uint32_t msg_type, + uint32_t target_cache, + uint32_t msg_length, + uint32_t response_length); void brw_set_dp_write_message(struct brw_compile *p, - struct brw_instruction *insn, - GLuint binding_table_index, - GLuint msg_control, - GLuint msg_type, - GLuint msg_length, - bool header_present, - GLuint last_render_target, - GLuint response_length, - GLuint end_of_thread, - GLuint send_commit_msg); - -void brw_urb_WRITE(struct brw_compile *p, - struct brw_reg dest, - GLuint msg_reg_nr, - struct brw_reg src0, - bool allocate, - bool used, - GLuint msg_length, - GLuint response_length, - bool eot, - bool writes_complete, - GLuint offset, - GLuint swizzle); - -void brw_ff_sync(struct brw_compile *p, - struct brw_reg dest, - GLuint msg_reg_nr, - struct brw_reg src0, - bool allocate, - GLuint response_length, - bool eot); - -void brw_svb_write(struct brw_compile *p, - struct brw_reg dest, - GLuint msg_reg_nr, - struct brw_reg src0, - GLuint binding_table_index, - bool send_commit_msg); - -void brw_fb_WRITE(struct brw_compile *p, - int dispatch_width, - GLuint msg_reg_nr, - struct brw_reg src0, - GLuint binding_table_index, - GLuint msg_length, - GLuint response_length, - bool eot, - bool header_present); + struct brw_instruction *insn, + uint32_t binding_table_index, + uint32_t msg_control, + uint32_t msg_type, + uint32_t msg_length, + bool header_present, + uint32_t last_render_target, + uint32_t response_length, + uint32_t end_of_thread, + uint32_t send_commit_msg); void brw_SAMPLE(struct brw_compile *p, - struct brw_reg dest, - GLuint msg_reg_nr, - struct brw_reg src0, - GLuint binding_table_index, - GLuint sampler, - GLuint writemask, - GLuint msg_type, - GLuint response_length, - GLuint msg_length, - GLuint header_present, - GLuint simd_mode, - GLuint return_format); - -void brw_math_16( struct brw_compile *p, - struct brw_reg dest, - GLuint function, - GLuint saturate, - GLuint msg_reg_nr, - struct brw_reg src, - GLuint precision ); - -void brw_math( struct brw_compile *p, - struct brw_reg dest, - GLuint function, - GLuint saturate, - GLuint msg_reg_nr, - struct brw_reg src, - GLuint data_type, - GLuint precision ); + struct brw_reg dest, + uint32_t msg_reg_nr, + struct brw_reg src0, + uint32_t binding_table_index, + uint32_t sampler, + uint32_t writemask, + uint32_t msg_type, + uint32_t response_length, + uint32_t msg_length, + uint32_t header_present, + uint32_t simd_mode, + uint32_t return_format); + +void brw_math_16(struct brw_compile *p, + struct brw_reg dest, + uint32_t function, + uint32_t saturate, + uint32_t msg_reg_nr, + struct brw_reg src, + uint32_t precision); + +void brw_math(struct brw_compile *p, + struct brw_reg dest, + uint32_t function, + uint32_t saturate, + uint32_t msg_reg_nr, + struct brw_reg src, + uint32_t data_type, + uint32_t precision); void brw_math2(struct brw_compile *p, - struct brw_reg dest, - GLuint function, - struct brw_reg src0, - struct brw_reg src1); + struct brw_reg dest, + uint32_t function, + struct brw_reg src0, + struct brw_reg src1); void brw_oword_block_read(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg mrf, - uint32_t offset, - uint32_t bind_table_index); + struct brw_reg dest, + struct brw_reg mrf, + uint32_t offset, + uint32_t bind_table_index); void brw_oword_block_read_scratch(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg mrf, - int num_regs, - GLuint offset); + struct brw_reg dest, + struct brw_reg mrf, + int num_regs, + uint32_t offset); void brw_oword_block_write_scratch(struct brw_compile *p, - struct brw_reg mrf, - int num_regs, - GLuint offset); + struct brw_reg mrf, + int num_regs, + uint32_t offset); void brw_dword_scattered_read(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg mrf, - uint32_t bind_table_index); - -void brw_dp_READ_4_vs( struct brw_compile *p, - struct brw_reg dest, - GLuint location, - GLuint bind_table_index ); - -void brw_dp_READ_4_vs_relative(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg addrReg, - GLuint offset, - GLuint bind_table_index); - -/* If/else/endif. Works by manipulating the execution flags on each - * channel. - */ -struct brw_instruction *brw_IF(struct brw_compile *p, - GLuint execute_size); -struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional, - struct brw_reg src0, struct brw_reg src1); - -void brw_ELSE(struct brw_compile *p); -void brw_ENDIF(struct brw_compile *p); - -/* DO/WHILE loops: - */ -struct brw_instruction *brw_DO(struct brw_compile *p, - GLuint execute_size); - -struct brw_instruction *brw_WHILE(struct brw_compile *p); - -struct brw_instruction *brw_BREAK(struct brw_compile *p); -struct brw_instruction *brw_CONT(struct brw_compile *p); -struct brw_instruction *gen6_CONT(struct brw_compile *p); -struct brw_instruction *gen6_HALT(struct brw_compile *p); + struct brw_reg dest, + struct brw_reg mrf, + uint32_t bind_table_index); /* Forward jumps: */ void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx); - - void brw_NOP(struct brw_compile *p); void brw_WAIT(struct brw_compile *p); @@ -1071,53 +957,48 @@ void brw_WAIT(struct brw_compile *p); * taken from src0: */ void brw_CMP(struct brw_compile *p, - struct brw_reg dest, - GLuint conditional, - struct brw_reg src0, - struct brw_reg src1); + struct brw_reg dest, + uint32_t conditional, + struct brw_reg src0, + struct brw_reg src1); -void brw_print_reg( struct brw_reg reg ); - - -/*********************************************************************** - * brw_eu_util.c: - */ +void brw_print_reg(struct brw_reg reg); void brw_copy_indirect_to_indirect(struct brw_compile *p, - struct brw_indirect dst_ptr, - struct brw_indirect src_ptr, - GLuint count); + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + uint32_t count); void brw_copy_from_indirect(struct brw_compile *p, - struct brw_reg dst, - struct brw_indirect ptr, - GLuint count); + struct brw_reg dst, + struct brw_indirect ptr, + uint32_t count); void brw_copy4(struct brw_compile *p, - struct brw_reg dst, - struct brw_reg src, - GLuint count); + struct brw_reg dst, + struct brw_reg src, + uint32_t count); void brw_copy8(struct brw_compile *p, - struct brw_reg dst, - struct brw_reg src, - GLuint count); + struct brw_reg dst, + struct brw_reg src, + uint32_t count); -void brw_math_invert( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg src); +void brw_math_invert(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src); void brw_set_src1(struct brw_compile *p, - struct brw_instruction *insn, - struct brw_reg reg); + struct brw_instruction *insn, + struct brw_reg reg); void brw_set_uip_jip(struct brw_compile *p); uint32_t brw_swap_cmod(uint32_t cmod); -/* brw_optimize.c */ -void brw_optimize(struct brw_compile *p); -void brw_remove_duplicate_mrf_moves(struct brw_compile *p); -void brw_remove_grf_to_mrf_moves(struct brw_compile *p); +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* BRW_EU_H */ -#endif diff --git a/backend/src/gen/brw_eu_emit.c b/backend/src/gen/brw_eu_emit.c index 210b058..fe0c703 100644 --- a/backend/src/gen/brw_eu_emit.c +++ b/backend/src/gen/brw_eu_emit.c @@ -21,26 +21,27 @@ * Authors: * Keith Whitwell */ - -#include "brw_context.h" +// #include "brw_context.h" #include "brw_defines.h" #include "brw_eu.h" -#include "glsl/ralloc.h" +#include + +#define Elements(x) (sizeof(x) / sizeof(*(x))) /*********************************************************************** * Internal helper for constructing instructions */ static void guess_execution_size(struct brw_compile *p, - struct brw_instruction *insn, - struct brw_reg reg) + struct brw_instruction *insn, + struct brw_reg reg) { if (reg.width == BRW_WIDTH_8 && p->compressed) insn->header.execution_size = BRW_EXECUTE_16; else - insn->header.execution_size = reg.width; /* note - definitions are compatible */ + insn->header.execution_size = reg.width; /* note - definitions are compatible */ } @@ -53,11 +54,10 @@ static void guess_execution_size(struct brw_compile *p, */ void gen6_resolve_implied_move(struct brw_compile *p, - struct brw_reg *src, - GLuint msg_reg_nr) + struct brw_reg *src, + uint32_t msg_reg_nr) { - struct intel_context *intel = &p->brw->intel; - if (intel->gen < 6) + if (p->gen < 6) return; if (src->file == BRW_MESSAGE_REGISTER_FILE) @@ -68,7 +68,7 @@ gen6_resolve_implied_move(struct brw_compile *p, brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), - retype(*src, BRW_REGISTER_TYPE_UD)); + retype(*src, BRW_REGISTER_TYPE_UD)); brw_pop_insn_state(p); } *src = brw_message_reg(msg_reg_nr); @@ -85,8 +85,7 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) * Since we're pretending to have 16 MRFs anyway, we may as well use the * registers required for messages with EOT. */ - struct intel_context *intel = &p->brw->intel; - if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) { + if (p->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) { reg->file = BRW_GENERAL_REGISTER_FILE; reg->nr += GEN7_MRF_HACK_START; } @@ -95,7 +94,7 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, - struct brw_reg dest) + struct brw_reg dest) { if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && dest.file != BRW_MESSAGE_REGISTER_FILE) @@ -111,16 +110,16 @@ brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, insn->bits1.da1.dest_reg_nr = dest.nr; if (insn->header.access_mode == BRW_ALIGN_1) { - insn->bits1.da1.dest_subreg_nr = dest.subnr; - if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) - dest.hstride = BRW_HORIZONTAL_STRIDE_1; - insn->bits1.da1.dest_horiz_stride = dest.hstride; + insn->bits1.da1.dest_subreg_nr = dest.subnr; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.da1.dest_horiz_stride = dest.hstride; } else { - insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; - insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; - /* even ignored in da16, still need to set as '01' */ - insn->bits1.da16.dest_horiz_stride = 1; + insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; + insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; + /* even ignored in da16, still need to set as '01' */ + insn->bits1.da16.dest_horiz_stride = 1; } } else { @@ -129,15 +128,15 @@ brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, /* These are different sizes in align1 vs align16: */ if (insn->header.access_mode == BRW_ALIGN_1) { - insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; - if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) - dest.hstride = BRW_HORIZONTAL_STRIDE_1; - insn->bits1.ia1.dest_horiz_stride = dest.hstride; + insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.ia1.dest_horiz_stride = dest.hstride; } else { - insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; - /* even ignored in da16, still need to set as '01' */ - insn->bits1.ia16.dest_horiz_stride = 1; + insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; + /* even ignored in da16, still need to set as '01' */ + insn->bits1.ia16.dest_horiz_stride = 1; } } @@ -164,8 +163,8 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg) * destination horiz stride has to be a word. */ if (reg.type == BRW_REGISTER_TYPE_V) { - assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * - reg_type_size[insn->bits1.da1.dest_reg_type] == 2); + assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * + reg_type_size[insn->bits1.da1.dest_reg_type] == 2); } return; @@ -189,7 +188,7 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg) width = width_for_reg[reg.width]; assert(insn->header.execution_size >= 0 && - insn->header.execution_size < Elements(execsize_for_reg)); + insn->header.execution_size < Elements(execsize_for_reg)); execsize = execsize_for_reg[insn->header.execution_size]; /* Restrictions from 3.3.10: Register Region Restrictions. */ @@ -227,7 +226,7 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg) void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, - struct brw_reg reg) + struct brw_reg reg) { if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) assert(reg.nr < 128); @@ -253,60 +252,60 @@ brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, else { if (reg.address_mode == BRW_ADDRESS_DIRECT) { - if (insn->header.access_mode == BRW_ALIGN_1) { - insn->bits2.da1.src0_subreg_nr = reg.subnr; - insn->bits2.da1.src0_reg_nr = reg.nr; - } - else { - insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; - insn->bits2.da16.src0_reg_nr = reg.nr; - } + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.da1.src0_subreg_nr = reg.subnr; + insn->bits2.da1.src0_reg_nr = reg.nr; + } + else { + insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; + insn->bits2.da16.src0_reg_nr = reg.nr; + } } else { - insn->bits2.ia1.src0_subreg_nr = reg.subnr; - - if (insn->header.access_mode == BRW_ALIGN_1) { - insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; - } - else { - insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; - } + insn->bits2.ia1.src0_subreg_nr = reg.subnr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; + } + else { + insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; + } } if (insn->header.access_mode == BRW_ALIGN_1) { - if (reg.width == BRW_WIDTH_1 && - insn->header.execution_size == BRW_EXECUTE_1) { - insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; - insn->bits2.da1.src0_width = BRW_WIDTH_1; - insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; - } - else { - insn->bits2.da1.src0_horiz_stride = reg.hstride; - insn->bits2.da1.src0_width = reg.width; - insn->bits2.da1.src0_vert_stride = reg.vstride; - } + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits2.da1.src0_width = BRW_WIDTH_1; + insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits2.da1.src0_horiz_stride = reg.hstride; + insn->bits2.da1.src0_width = reg.width; + insn->bits2.da1.src0_vert_stride = reg.vstride; + } } else { - insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); - insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); - insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); - insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); - - /* This is an oddity of the fact we're using the same - * descriptions for registers in align_16 as align_1: - */ - if (reg.vstride == BRW_VERTICAL_STRIDE_8) - insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; - else - insn->bits2.da16.src0_vert_stride = reg.vstride; + insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits2.da16.src0_vert_stride = reg.vstride; } } } void brw_set_src1(struct brw_compile *p, - struct brw_instruction *insn, - struct brw_reg reg) + struct brw_instruction *insn, + struct brw_reg reg) { assert(reg.file != BRW_MESSAGE_REGISTER_FILE); @@ -336,40 +335,40 @@ void brw_set_src1(struct brw_compile *p, /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ if (insn->header.access_mode == BRW_ALIGN_1) { - insn->bits3.da1.src1_subreg_nr = reg.subnr; - insn->bits3.da1.src1_reg_nr = reg.nr; + insn->bits3.da1.src1_subreg_nr = reg.subnr; + insn->bits3.da1.src1_reg_nr = reg.nr; } else { - insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; - insn->bits3.da16.src1_reg_nr = reg.nr; + insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; + insn->bits3.da16.src1_reg_nr = reg.nr; } if (insn->header.access_mode == BRW_ALIGN_1) { - if (reg.width == BRW_WIDTH_1 && - insn->header.execution_size == BRW_EXECUTE_1) { - insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; - insn->bits3.da1.src1_width = BRW_WIDTH_1; - insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; - } - else { - insn->bits3.da1.src1_horiz_stride = reg.hstride; - insn->bits3.da1.src1_width = reg.width; - insn->bits3.da1.src1_vert_stride = reg.vstride; - } + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits3.da1.src1_width = BRW_WIDTH_1; + insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits3.da1.src1_horiz_stride = reg.hstride; + insn->bits3.da1.src1_width = reg.width; + insn->bits3.da1.src1_vert_stride = reg.vstride; + } } else { - insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); - insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); - insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); - insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); - - /* This is an oddity of the fact we're using the same - * descriptions for registers in align_16 as align_1: - */ - if (reg.vstride == BRW_VERTICAL_STRIDE_8) - insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; - else - insn->bits3.da16.src1_vert_stride = reg.vstride; + insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits3.da16.src1_vert_stride = reg.vstride; } } } @@ -384,30 +383,28 @@ void brw_set_src1(struct brw_compile *p, */ static void brw_set_message_descriptor(struct brw_compile *p, - struct brw_instruction *inst, - enum brw_message_target sfid, - unsigned msg_length, - unsigned response_length, - bool header_present, - bool end_of_thread) + struct brw_instruction *inst, + enum brw_message_target sfid, + unsigned msg_length, + unsigned response_length, + bool header_present, + bool end_of_thread) { - struct intel_context *intel = &p->brw->intel; - brw_set_src1(p, inst, brw_imm_d(0)); - if (intel->gen >= 5) { + if (p->gen >= 5) { inst->bits3.generic_gen5.header_present = header_present; inst->bits3.generic_gen5.response_length = response_length; inst->bits3.generic_gen5.msg_length = msg_length; inst->bits3.generic_gen5.end_of_thread = end_of_thread; - if (intel->gen >= 6) { - /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */ - inst->header.destreg__conditionalmod = sfid; + if (p->gen >= 6) { + /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */ + inst->header.destreg__conditionalmod = sfid; } else { - /* Set Extended Message Descriptor (ex_desc) */ - inst->bits2.send_gen5.sfid = sfid; - inst->bits2.send_gen5.end_of_thread = end_of_thread; + /* Set Extended Message Descriptor (ex_desc) */ + inst->bits2.send_gen5.sfid = sfid; + inst->bits2.send_gen5.end_of_thread = end_of_thread; } } else { inst->bits3.generic.response_length = response_length; @@ -417,16 +414,14 @@ brw_set_message_descriptor(struct brw_compile *p, } } -static void brw_set_math_message( struct brw_compile *p, - struct brw_instruction *insn, - GLuint function, - GLuint integer_type, - bool low_precision, - bool saturate, - GLuint dataType ) +static void brw_set_math_message(struct brw_compile *p, + struct brw_instruction *insn, + uint32_t function, + uint32_t integer_type, + bool low_precision, + bool saturate, + uint32_t dataType) { - struct brw_context *brw = p->brw; - struct intel_context *intel = &brw->intel; unsigned msg_length; unsigned response_length; @@ -455,8 +450,8 @@ static void brw_set_math_message( struct brw_compile *p, } brw_set_message_descriptor(p, insn, BRW_SFID_MATH, - msg_length, response_length, false, false); - if (intel->gen == 5) { + msg_length, response_length, false, false); + if (p->gen == 5) { insn->bits3.math_gen5.function = function; insn->bits3.math_gen5.int_type = integer_type; insn->bits3.math_gen5.precision = low_precision; @@ -472,236 +467,92 @@ static void brw_set_math_message( struct brw_compile *p, } } - -static void brw_set_ff_sync_message(struct brw_compile *p, - struct brw_instruction *insn, - bool allocate, - GLuint response_length, - bool end_of_thread) -{ - brw_set_message_descriptor(p, insn, BRW_SFID_URB, - 1, response_length, true, end_of_thread); - insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ - insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ - insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ - insn->bits3.urb_gen5.allocate = allocate; - insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ - insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ -} - -static void brw_set_urb_message( struct brw_compile *p, - struct brw_instruction *insn, - bool allocate, - bool used, - GLuint msg_length, - GLuint response_length, - bool end_of_thread, - bool complete, - GLuint offset, - GLuint swizzle_control ) -{ - struct brw_context *brw = p->brw; - struct intel_context *intel = &brw->intel; - - brw_set_message_descriptor(p, insn, BRW_SFID_URB, - msg_length, response_length, true, end_of_thread); - if (intel->gen == 7) { - insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */ - insn->bits3.urb_gen7.offset = offset; - assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); - insn->bits3.urb_gen7.swizzle_control = swizzle_control; - /* per_slot_offset = 0 makes it ignore offsets in message header */ - insn->bits3.urb_gen7.per_slot_offset = 0; - insn->bits3.urb_gen7.complete = complete; - } else if (intel->gen >= 5) { - insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ - insn->bits3.urb_gen5.offset = offset; - insn->bits3.urb_gen5.swizzle_control = swizzle_control; - insn->bits3.urb_gen5.allocate = allocate; - insn->bits3.urb_gen5.used = used; /* ? */ - insn->bits3.urb_gen5.complete = complete; - } else { - insn->bits3.urb.opcode = 0; /* ? */ - insn->bits3.urb.offset = offset; - insn->bits3.urb.swizzle_control = swizzle_control; - insn->bits3.urb.allocate = allocate; - insn->bits3.urb.used = used; /* ? */ - insn->bits3.urb.complete = complete; - } -} - void brw_set_dp_write_message(struct brw_compile *p, - struct brw_instruction *insn, - GLuint binding_table_index, - GLuint msg_control, - GLuint msg_type, - GLuint msg_length, - bool header_present, - GLuint last_render_target, - GLuint response_length, - GLuint end_of_thread, - GLuint send_commit_msg) + struct brw_instruction *insn, + uint32_t binding_table_index, + uint32_t msg_control, + uint32_t msg_type, + uint32_t msg_length, + bool header_present, + uint32_t last_render_target, + uint32_t response_length, + uint32_t end_of_thread, + uint32_t send_commit_msg) { - struct brw_context *brw = p->brw; - struct intel_context *intel = &brw->intel; unsigned sfid; - if (intel->gen >= 7) { - /* Use the Render Cache for RT writes; otherwise use the Data Cache */ - if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) - sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; - else - sfid = GEN7_SFID_DATAPORT_DATA_CACHE; - } else if (intel->gen == 6) { - /* Use the render cache for all write messages. */ - sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; - } else { - sfid = BRW_SFID_DATAPORT_WRITE; - } - + /* Use the Render Cache for RT writes; otherwise use the Data Cache */ + if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) + sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; + else + sfid = GEN7_SFID_DATAPORT_DATA_CACHE; brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, - header_present, end_of_thread); - - if (intel->gen >= 7) { - insn->bits3.gen7_dp.binding_table_index = binding_table_index; - insn->bits3.gen7_dp.msg_control = msg_control; - insn->bits3.gen7_dp.last_render_target = last_render_target; - insn->bits3.gen7_dp.msg_type = msg_type; - } else if (intel->gen == 6) { - insn->bits3.gen6_dp.binding_table_index = binding_table_index; - insn->bits3.gen6_dp.msg_control = msg_control; - insn->bits3.gen6_dp.last_render_target = last_render_target; - insn->bits3.gen6_dp.msg_type = msg_type; - insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; - } else if (intel->gen == 5) { - insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; - insn->bits3.dp_write_gen5.msg_control = msg_control; - insn->bits3.dp_write_gen5.last_render_target = last_render_target; - insn->bits3.dp_write_gen5.msg_type = msg_type; - insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; - } else { - insn->bits3.dp_write.binding_table_index = binding_table_index; - insn->bits3.dp_write.msg_control = msg_control; - insn->bits3.dp_write.last_render_target = last_render_target; - insn->bits3.dp_write.msg_type = msg_type; - insn->bits3.dp_write.send_commit_msg = send_commit_msg; - } + header_present, end_of_thread); + + insn->bits3.gen7_dp.binding_table_index = binding_table_index; + insn->bits3.gen7_dp.msg_control = msg_control; + insn->bits3.gen7_dp.last_render_target = last_render_target; + insn->bits3.gen7_dp.msg_type = msg_type; } void brw_set_dp_read_message(struct brw_compile *p, - struct brw_instruction *insn, - GLuint binding_table_index, - GLuint msg_control, - GLuint msg_type, - GLuint target_cache, - GLuint msg_length, - GLuint response_length) + struct brw_instruction *insn, + uint32_t binding_table_index, + uint32_t msg_control, + uint32_t msg_type, + uint32_t target_cache, + uint32_t msg_length, + uint32_t response_length) { - struct brw_context *brw = p->brw; - struct intel_context *intel = &brw->intel; unsigned sfid; - if (intel->gen >= 7) { - sfid = GEN7_SFID_DATAPORT_DATA_CACHE; - } else if (intel->gen == 6) { - if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) - sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; - else - sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; - } else { - sfid = BRW_SFID_DATAPORT_READ; - } - + sfid = GEN7_SFID_DATAPORT_DATA_CACHE; brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, - true, false); - - if (intel->gen >= 7) { - insn->bits3.gen7_dp.binding_table_index = binding_table_index; - insn->bits3.gen7_dp.msg_control = msg_control; - insn->bits3.gen7_dp.last_render_target = 0; - insn->bits3.gen7_dp.msg_type = msg_type; - } else if (intel->gen == 6) { - insn->bits3.gen6_dp.binding_table_index = binding_table_index; - insn->bits3.gen6_dp.msg_control = msg_control; - insn->bits3.gen6_dp.last_render_target = 0; - insn->bits3.gen6_dp.msg_type = msg_type; - insn->bits3.gen6_dp.send_commit_msg = 0; - } else if (intel->gen == 5) { - insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; - insn->bits3.dp_read_gen5.msg_control = msg_control; - insn->bits3.dp_read_gen5.msg_type = msg_type; - insn->bits3.dp_read_gen5.target_cache = target_cache; - } else if (intel->is_g4x) { - insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ - insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ - insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ - insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ - } else { - insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ - insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ - insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ - insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ - } + true, false); + + insn->bits3.gen7_dp.binding_table_index = binding_table_index; + insn->bits3.gen7_dp.msg_control = msg_control; + insn->bits3.gen7_dp.last_render_target = 0; + insn->bits3.gen7_dp.msg_type = msg_type; } void brw_set_sampler_message(struct brw_compile *p, struct brw_instruction *insn, - GLuint binding_table_index, - GLuint sampler, - GLuint msg_type, - GLuint response_length, - GLuint msg_length, - GLuint header_present, - GLuint simd_mode, - GLuint return_format) + uint32_t binding_table_index, + uint32_t sampler, + uint32_t msg_type, + uint32_t response_length, + uint32_t msg_length, + uint32_t header_present, + uint32_t simd_mode, + uint32_t return_format) { - struct brw_context *brw = p->brw; - struct intel_context *intel = &brw->intel; - brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length, - response_length, header_present, false); - - if (intel->gen >= 7) { - insn->bits3.sampler_gen7.binding_table_index = binding_table_index; - insn->bits3.sampler_gen7.sampler = sampler; - insn->bits3.sampler_gen7.msg_type = msg_type; - insn->bits3.sampler_gen7.simd_mode = simd_mode; - } else if (intel->gen >= 5) { - insn->bits3.sampler_gen5.binding_table_index = binding_table_index; - insn->bits3.sampler_gen5.sampler = sampler; - insn->bits3.sampler_gen5.msg_type = msg_type; - insn->bits3.sampler_gen5.simd_mode = simd_mode; - } else if (intel->is_g4x) { - insn->bits3.sampler_g4x.binding_table_index = binding_table_index; - insn->bits3.sampler_g4x.sampler = sampler; - insn->bits3.sampler_g4x.msg_type = msg_type; - } else { - insn->bits3.sampler.binding_table_index = binding_table_index; - insn->bits3.sampler.sampler = sampler; - insn->bits3.sampler.msg_type = msg_type; - insn->bits3.sampler.return_format = return_format; - } + response_length, header_present, false); + insn->bits3.sampler_gen7.binding_table_index = binding_table_index; + insn->bits3.sampler_gen7.sampler = sampler; + insn->bits3.sampler_gen7.msg_type = msg_type; + insn->bits3.sampler_gen7.simd_mode = simd_mode; } - #define next_insn brw_next_insn struct brw_instruction * -brw_next_insn(struct brw_compile *p, GLuint opcode) +brw_next_insn(struct brw_compile *p, uint32_t opcode) { struct brw_instruction *insn; - + assert(0); +#if 0 if (p->nr_insn + 1 > p->store_size) { - if (0) - printf("incresing the store size to %d\n", p->store_size << 1); p->store_size <<= 1; p->store = reralloc(p->mem_ctx, p->store, struct brw_instruction, p->store_size); if (!p->store) assert(!"realloc eu store memeory failed"); } +#endif insn = &p->store[p->nr_insn++]; memcpy(insn, p->current, sizeof(*insn)); @@ -718,10 +569,10 @@ brw_next_insn(struct brw_compile *p, GLuint opcode) return insn; } -static struct brw_instruction *brw_alu1( struct brw_compile *p, - GLuint opcode, - struct brw_reg dest, - struct brw_reg src ) +static struct brw_instruction *brw_alu1(struct brw_compile *p, + uint32_t opcode, + struct brw_reg dest, + struct brw_reg src) { struct brw_instruction *insn = next_insn(p, opcode); brw_set_dest(p, insn, dest); @@ -730,10 +581,10 @@ static struct brw_instruction *brw_alu1( struct brw_compile *p, } static struct brw_instruction *brw_alu2(struct brw_compile *p, - GLuint opcode, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1 ) + uint32_t opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) { struct brw_instruction *insn = next_insn(p, opcode); brw_set_dest(p, insn, dest); @@ -754,11 +605,11 @@ get_3src_subreg_nr(struct brw_reg reg) } static struct brw_instruction *brw_alu3(struct brw_compile *p, - GLuint opcode, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1, - struct brw_reg src2) + uint32_t opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1, + struct brw_reg src2) { struct brw_instruction *insn = next_insn(p, opcode); @@ -767,7 +618,7 @@ static struct brw_instruction *brw_alu3(struct brw_compile *p, assert(insn->header.access_mode == BRW_ALIGN_16); assert(dest.file == BRW_GENERAL_REGISTER_FILE || - dest.file == BRW_MESSAGE_REGISTER_FILE); + dest.file == BRW_MESSAGE_REGISTER_FILE); assert(dest.nr < 128); assert(dest.address_mode == BRW_ADDRESS_DIRECT); assert(dest.type = BRW_REGISTER_TYPE_F); @@ -818,31 +669,32 @@ static struct brw_instruction *brw_alu3(struct brw_compile *p, /*********************************************************************** * Convenience routines. */ -#define ALU1(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ - struct brw_reg dest, \ - struct brw_reg src0) \ -{ \ - return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ -} - -#define ALU2(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ - struct brw_reg dest, \ - struct brw_reg src0, \ - struct brw_reg src1) \ -{ \ - return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ -} - -#define ALU3(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ - struct brw_reg dest, \ - struct brw_reg src0, \ - struct brw_reg src1, \ - struct brw_reg src2) \ -{ \ - return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0) \ +{ \ + return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ +} + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1) \ +{ \ + return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ +} + +#define ALU3(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1, \ + struct brw_reg src2) \ +{ \ + return brw_alu3(p, BRW_OPCODE_##OP, dest, \ + src0, src1, src2); \ } /* Rounding operations (other than RNDD) require two instructions - the first @@ -852,25 +704,17 @@ struct brw_instruction *brw_##OP(struct brw_compile *p, \ * * Sandybridge and later appear to round correctly without an ADD. */ -#define ROUND(OP) \ -void brw_##OP(struct brw_compile *p, \ - struct brw_reg dest, \ - struct brw_reg src) \ -{ \ - struct brw_instruction *rnd, *add; \ - rnd = next_insn(p, BRW_OPCODE_##OP); \ - brw_set_dest(p, rnd, dest); \ - brw_set_src0(p, rnd, src); \ - \ - if (p->brw->intel.gen < 6) { \ - /* turn on round-increments */ \ - rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \ - add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ - add->header.predicate_control = BRW_PREDICATE_NORMAL; \ - } \ +#define ROUND(OP) \ +void brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src) \ +{ \ + struct brw_instruction *rnd; \ + rnd = next_insn(p, BRW_OPCODE_##OP); \ + brw_set_dest(p, rnd, dest); \ + brw_set_src0(p, rnd, src); \ } - ALU1(MOV) ALU2(SEL) ALU1(NOT) @@ -898,23 +742,22 @@ ALU3(MAD) ROUND(RNDZ) ROUND(RNDE) - struct brw_instruction *brw_ADD(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1) + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) { /* 6.2.2: add */ if (src0.type == BRW_REGISTER_TYPE_F || (src0.file == BRW_IMMEDIATE_VALUE && - src0.type == BRW_REGISTER_TYPE_VF)) { + src0.type == BRW_REGISTER_TYPE_VF)) { assert(src1.type != BRW_REGISTER_TYPE_UD); assert(src1.type != BRW_REGISTER_TYPE_D); } if (src1.type == BRW_REGISTER_TYPE_F || (src1.file == BRW_IMMEDIATE_VALUE && - src1.type == BRW_REGISTER_TYPE_VF)) { + src1.type == BRW_REGISTER_TYPE_VF)) { assert(src0.type != BRW_REGISTER_TYPE_UD); assert(src0.type != BRW_REGISTER_TYPE_D); } @@ -923,9 +766,9 @@ struct brw_instruction *brw_ADD(struct brw_compile *p, } struct brw_instruction *brw_MUL(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1) + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) { /* 6.32.38: mul */ if (src0.type == BRW_REGISTER_TYPE_D || @@ -937,22 +780,22 @@ struct brw_instruction *brw_MUL(struct brw_compile *p, if (src0.type == BRW_REGISTER_TYPE_F || (src0.file == BRW_IMMEDIATE_VALUE && - src0.type == BRW_REGISTER_TYPE_VF)) { + src0.type == BRW_REGISTER_TYPE_VF)) { assert(src1.type != BRW_REGISTER_TYPE_UD); assert(src1.type != BRW_REGISTER_TYPE_D); } if (src1.type == BRW_REGISTER_TYPE_F || (src1.file == BRW_IMMEDIATE_VALUE && - src1.type == BRW_REGISTER_TYPE_VF)) { + src1.type == BRW_REGISTER_TYPE_VF)) { assert(src0.type != BRW_REGISTER_TYPE_UD); assert(src0.type != BRW_REGISTER_TYPE_D); } assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || - src0.nr != BRW_ARF_ACCUMULATOR); + src0.nr != BRW_ARF_ACCUMULATOR); assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || - src1.nr != BRW_ARF_ACCUMULATOR); + src1.nr != BRW_ARF_ACCUMULATOR); return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); } @@ -966,10 +809,6 @@ void brw_NOP(struct brw_compile *p) brw_set_src1(p, insn, brw_imm_ud(0x0)); } - - - - /*********************************************************************** * Comparisons, if/else/endif */ @@ -990,611 +829,15 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, return insn; } -static void -push_if_stack(struct brw_compile *p, struct brw_instruction *inst) -{ - p->if_stack[p->if_stack_depth] = inst - p->store; - - p->if_stack_depth++; - if (p->if_stack_array_size <= p->if_stack_depth) { - p->if_stack_array_size *= 2; - p->if_stack = reralloc(p->mem_ctx, p->if_stack, int, - p->if_stack_array_size); - } -} - -static struct brw_instruction * -pop_if_stack(struct brw_compile *p) -{ - p->if_stack_depth--; - return &p->store[p->if_stack[p->if_stack_depth]]; -} - -static void -push_loop_stack(struct brw_compile *p, struct brw_instruction *inst) -{ - if (p->loop_stack_array_size < p->loop_stack_depth) { - p->loop_stack_array_size *= 2; - p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int, - p->loop_stack_array_size); - p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int, - p->loop_stack_array_size); - } - - p->loop_stack[p->loop_stack_depth] = inst - p->store; - p->loop_stack_depth++; - p->if_depth_in_loop[p->loop_stack_depth] = 0; -} - -static struct brw_instruction * -get_inner_do_insn(struct brw_compile *p) -{ - return &p->store[p->loop_stack[p->loop_stack_depth - 1]]; -} - -/* EU takes the value from the flag register and pushes it onto some - * sort of a stack (presumably merging with any flag value already on - * the stack). Within an if block, the flags at the top of the stack - * control execution on each channel of the unit, eg. on each of the - * 16 pixel values in our wm programs. - * - * When the matching 'else' instruction is reached (presumably by - * countdown of the instruction count patched in by our ELSE/ENDIF - * functions), the relevent flags are inverted. - * - * When the matching 'endif' instruction is reached, the flags are - * popped off. If the stack is now empty, normal execution resumes. - */ -struct brw_instruction * -brw_IF(struct brw_compile *p, GLuint execute_size) -{ - struct intel_context *intel = &p->brw->intel; - struct brw_instruction *insn; - - insn = next_insn(p, BRW_OPCODE_IF); - - /* Override the defaults for this instruction: - */ - if (intel->gen < 6) { - brw_set_dest(p, insn, brw_ip_reg()); - brw_set_src0(p, insn, brw_ip_reg()); - brw_set_src1(p, insn, brw_imm_d(0x0)); - } else if (intel->gen == 6) { - brw_set_dest(p, insn, brw_imm_w(0)); - insn->bits1.branch_gen6.jump_count = 0; - brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); - brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); - } else { - brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); - brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); - brw_set_src1(p, insn, brw_imm_ud(0)); - insn->bits3.break_cont.jip = 0; - insn->bits3.break_cont.uip = 0; - } - - insn->header.execution_size = execute_size; - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.predicate_control = BRW_PREDICATE_NORMAL; - insn->header.mask_control = BRW_MASK_ENABLE; - if (!p->single_program_flow) - insn->header.thread_control = BRW_THREAD_SWITCH; - - p->current->header.predicate_control = BRW_PREDICATE_NONE; - - push_if_stack(p, insn); - p->if_depth_in_loop[p->loop_stack_depth]++; - return insn; -} - -/* This function is only used for gen6-style IF instructions with an - * embedded comparison (conditional modifier). It is not used on gen7. - */ -struct brw_instruction * -gen6_IF(struct brw_compile *p, uint32_t conditional, - struct brw_reg src0, struct brw_reg src1) -{ - struct brw_instruction *insn; - - insn = next_insn(p, BRW_OPCODE_IF); - - brw_set_dest(p, insn, brw_imm_w(0)); - if (p->compressed) { - insn->header.execution_size = BRW_EXECUTE_16; - } else { - insn->header.execution_size = BRW_EXECUTE_8; - } - insn->bits1.branch_gen6.jump_count = 0; - brw_set_src0(p, insn, src0); - brw_set_src1(p, insn, src1); - - assert(insn->header.compression_control == BRW_COMPRESSION_NONE); - assert(insn->header.predicate_control == BRW_PREDICATE_NONE); - insn->header.destreg__conditionalmod = conditional; - - if (!p->single_program_flow) - insn->header.thread_control = BRW_THREAD_SWITCH; - - push_if_stack(p, insn); - return insn; -} - -/** - * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. - */ -static void -convert_IF_ELSE_to_ADD(struct brw_compile *p, - struct brw_instruction *if_inst, - struct brw_instruction *else_inst) -{ - /* The next instruction (where the ENDIF would be, if it existed) */ - struct brw_instruction *next_inst = &p->store[p->nr_insn]; - - assert(p->single_program_flow); - assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); - assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); - assert(if_inst->header.execution_size == BRW_EXECUTE_1); - - /* Convert IF to an ADD instruction that moves the instruction pointer - * to the first instruction of the ELSE block. If there is no ELSE - * block, point to where ENDIF would be. Reverse the predicate. - * - * There's no need to execute an ENDIF since we don't need to do any - * stack operations, and if we're currently executing, we just want to - * continue normally. - */ - if_inst->header.opcode = BRW_OPCODE_ADD; - if_inst->header.predicate_inverse = 1; - - if (else_inst != NULL) { - /* Convert ELSE to an ADD instruction that points where the ENDIF - * would be. - */ - else_inst->header.opcode = BRW_OPCODE_ADD; - - if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; - else_inst->bits3.ud = (next_inst - else_inst) * 16; - } else { - if_inst->bits3.ud = (next_inst - if_inst) * 16; - } -} - -/** - * Patch IF and ELSE instructions with appropriate jump targets. - */ -static void -patch_IF_ELSE(struct brw_compile *p, - struct brw_instruction *if_inst, - struct brw_instruction *else_inst, - struct brw_instruction *endif_inst) -{ - struct intel_context *intel = &p->brw->intel; - - /* We shouldn't be patching IF and ELSE instructions in single program flow - * mode when gen < 6, because in single program flow mode on those - * platforms, we convert flow control instructions to conditional ADDs that - * operate on IP (see brw_ENDIF). - * - * However, on Gen6, writing to IP doesn't work in single program flow mode - * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may - * not be updated by non-flow control instructions."). And on later - * platforms, there is no significant benefit to converting control flow - * instructions to conditional ADDs. So we do patch IF and ELSE - * instructions in single program flow mode on those platforms. - */ - if (intel->gen < 6) - assert(!p->single_program_flow); - - assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); - assert(endif_inst != NULL); - assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); - - unsigned br = 1; - /* Jump count is for 64bit data chunk each, so one 128bit instruction - * requires 2 chunks. - */ - if (intel->gen >= 5) - br = 2; - - assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); - endif_inst->header.execution_size = if_inst->header.execution_size; - - if (else_inst == NULL) { - /* Patch IF -> ENDIF */ - if (intel->gen < 6) { - /* Turn it into an IFF, which means no mask stack operations for - * all-false and jumping past the ENDIF. - */ - if_inst->header.opcode = BRW_OPCODE_IFF; - if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); - if_inst->bits3.if_else.pop_count = 0; - if_inst->bits3.if_else.pad0 = 0; - } else if (intel->gen == 6) { - /* As of gen6, there is no IFF and IF must point to the ENDIF. */ - if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); - } else { - if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); - if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst); - } - } else { - else_inst->header.execution_size = if_inst->header.execution_size; - - /* Patch IF -> ELSE */ - if (intel->gen < 6) { - if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); - if_inst->bits3.if_else.pop_count = 0; - if_inst->bits3.if_else.pad0 = 0; - } else if (intel->gen == 6) { - if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); - } - - /* Patch ELSE -> ENDIF */ - if (intel->gen < 6) { - /* BRW_OPCODE_ELSE pre-gen6 should point just past the - * matching ENDIF. - */ - else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); - else_inst->bits3.if_else.pop_count = 1; - else_inst->bits3.if_else.pad0 = 0; - } else if (intel->gen == 6) { - /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ - else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); - } else { - /* The IF instruction's JIP should point just past the ELSE */ - if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1); - /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ - if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); - else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst); - } - } -} - -void -brw_ELSE(struct brw_compile *p) -{ - struct intel_context *intel = &p->brw->intel; - struct brw_instruction *insn; - - insn = next_insn(p, BRW_OPCODE_ELSE); - - if (intel->gen < 6) { - brw_set_dest(p, insn, brw_ip_reg()); - brw_set_src0(p, insn, brw_ip_reg()); - brw_set_src1(p, insn, brw_imm_d(0x0)); - } else if (intel->gen == 6) { - brw_set_dest(p, insn, brw_imm_w(0)); - insn->bits1.branch_gen6.jump_count = 0; - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - } else { - brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, brw_imm_ud(0)); - insn->bits3.break_cont.jip = 0; - insn->bits3.break_cont.uip = 0; - } - - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.mask_control = BRW_MASK_ENABLE; - if (!p->single_program_flow) - insn->header.thread_control = BRW_THREAD_SWITCH; - - push_if_stack(p, insn); -} - -void -brw_ENDIF(struct brw_compile *p) -{ - struct intel_context *intel = &p->brw->intel; - struct brw_instruction *insn = NULL; - struct brw_instruction *else_inst = NULL; - struct brw_instruction *if_inst = NULL; - struct brw_instruction *tmp; - bool emit_endif = true; - - /* In single program flow mode, we can express IF and ELSE instructions - * equivalently as ADD instructions that operate on IP. On platforms prior - * to Gen6, flow control instructions cause an implied thread switch, so - * this is a significant savings. - * - * However, on Gen6, writing to IP doesn't work in single program flow mode - * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may - * not be updated by non-flow control instructions."). And on later - * platforms, there is no significant benefit to converting control flow - * instructions to conditional ADDs. So we only do this trick on Gen4 and - * Gen5. - */ - if (intel->gen < 6 && p->single_program_flow) - emit_endif = false; - - /* - * A single next_insn() may change the base adress of instruction store - * memory(p->store), so call it first before referencing the instruction - * store pointer from an index - */ - if (emit_endif) - insn = next_insn(p, BRW_OPCODE_ENDIF); - - /* Pop the IF and (optional) ELSE instructions from the stack */ - p->if_depth_in_loop[p->loop_stack_depth]--; - tmp = pop_if_stack(p); - if (tmp->header.opcode == BRW_OPCODE_ELSE) { - else_inst = tmp; - tmp = pop_if_stack(p); - } - if_inst = tmp; - - if (!emit_endif) { - /* ENDIF is useless; don't bother emitting it. */ - convert_IF_ELSE_to_ADD(p, if_inst, else_inst); - return; - } - - if (intel->gen < 6) { - brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src1(p, insn, brw_imm_d(0x0)); - } else if (intel->gen == 6) { - brw_set_dest(p, insn, brw_imm_w(0)); - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - } else { - brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, brw_imm_ud(0)); - } - - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.mask_control = BRW_MASK_ENABLE; - insn->header.thread_control = BRW_THREAD_SWITCH; - - /* Also pop item off the stack in the endif instruction: */ - if (intel->gen < 6) { - insn->bits3.if_else.jump_count = 0; - insn->bits3.if_else.pop_count = 1; - insn->bits3.if_else.pad0 = 0; - } else if (intel->gen == 6) { - insn->bits1.branch_gen6.jump_count = 2; - } else { - insn->bits3.break_cont.jip = 2; - } - patch_IF_ELSE(p, if_inst, else_inst, insn); -} - -struct brw_instruction *brw_BREAK(struct brw_compile *p) -{ - struct intel_context *intel = &p->brw->intel; - struct brw_instruction *insn; - - insn = next_insn(p, BRW_OPCODE_BREAK); - if (intel->gen >= 6) { - brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, brw_imm_d(0x0)); - } else { - brw_set_dest(p, insn, brw_ip_reg()); - brw_set_src0(p, insn, brw_ip_reg()); - brw_set_src1(p, insn, brw_imm_d(0x0)); - insn->bits3.if_else.pad0 = 0; - insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth]; - } - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = BRW_EXECUTE_8; - - return insn; -} - -struct brw_instruction *gen6_CONT(struct brw_compile *p) -{ - struct brw_instruction *insn; - - insn = next_insn(p, BRW_OPCODE_CONTINUE); - brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_dest(p, insn, brw_ip_reg()); - brw_set_src0(p, insn, brw_ip_reg()); - brw_set_src1(p, insn, brw_imm_d(0x0)); - - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = BRW_EXECUTE_8; - return insn; -} - -struct brw_instruction *brw_CONT(struct brw_compile *p) -{ - struct brw_instruction *insn; - insn = next_insn(p, BRW_OPCODE_CONTINUE); - brw_set_dest(p, insn, brw_ip_reg()); - brw_set_src0(p, insn, brw_ip_reg()); - brw_set_src1(p, insn, brw_imm_d(0x0)); - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = BRW_EXECUTE_8; - /* insn->header.mask_control = BRW_MASK_DISABLE; */ - insn->bits3.if_else.pad0 = 0; - insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth]; - return insn; -} - -struct brw_instruction *gen6_HALT(struct brw_compile *p) -{ - struct brw_instruction *insn; - - insn = next_insn(p, BRW_OPCODE_HALT); - brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */ - - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = BRW_EXECUTE_8; - return insn; -} - -/* DO/WHILE loop: - * - * The DO/WHILE is just an unterminated loop -- break or continue are - * used for control within the loop. We have a few ways they can be - * done. - * - * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, - * jip and no DO instruction. - * - * For non-uniform control flow pre-gen6, there's a DO instruction to - * push the mask, and a WHILE to jump back, and BREAK to get out and - * pop the mask. - * - * For gen6, there's no more mask stack, so no need for DO. WHILE - * just points back to the first instruction of the loop. - */ -struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) -{ - struct intel_context *intel = &p->brw->intel; - - if (intel->gen >= 6 || p->single_program_flow) { - push_loop_stack(p, &p->store[p->nr_insn]); - return &p->store[p->nr_insn]; - } else { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); - - push_loop_stack(p, insn); - - /* Override the defaults for this instruction: - */ - brw_set_dest(p, insn, brw_null_reg()); - brw_set_src0(p, insn, brw_null_reg()); - brw_set_src1(p, insn, brw_null_reg()); - - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = execute_size; - insn->header.predicate_control = BRW_PREDICATE_NONE; - /* insn->header.mask_control = BRW_MASK_ENABLE; */ - /* insn->header.mask_control = BRW_MASK_DISABLE; */ - - return insn; - } -} - -/** - * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE - * instruction here. - * - * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop - * nesting, since it can always just point to the end of the block/current loop. - */ -static void -brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst) -{ - struct intel_context *intel = &p->brw->intel; - struct brw_instruction *do_inst = get_inner_do_insn(p); - struct brw_instruction *inst; - int br = (intel->gen == 5) ? 2 : 1; - - for (inst = while_inst - 1; inst != do_inst; inst--) { - /* If the jump count is != 0, that means that this instruction has already - * been patched because it's part of a loop inside of the one we're - * patching. - */ - if (inst->header.opcode == BRW_OPCODE_BREAK && - inst->bits3.if_else.jump_count == 0) { - inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1); - } else if (inst->header.opcode == BRW_OPCODE_CONTINUE && - inst->bits3.if_else.jump_count == 0) { - inst->bits3.if_else.jump_count = br * (while_inst - inst); - } - } -} - -struct brw_instruction *brw_WHILE(struct brw_compile *p) -{ - struct intel_context *intel = &p->brw->intel; - struct brw_instruction *insn, *do_insn; - GLuint br = 1; - - if (intel->gen >= 5) - br = 2; - - if (intel->gen >= 7) { - insn = next_insn(p, BRW_OPCODE_WHILE); - do_insn = get_inner_do_insn(p); - - brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, brw_imm_ud(0)); - insn->bits3.break_cont.jip = br * (do_insn - insn); - - insn->header.execution_size = BRW_EXECUTE_8; - } else if (intel->gen == 6) { - insn = next_insn(p, BRW_OPCODE_WHILE); - do_insn = get_inner_do_insn(p); - - brw_set_dest(p, insn, brw_imm_w(0)); - insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - - insn->header.execution_size = BRW_EXECUTE_8; - } else { - if (p->single_program_flow) { - insn = next_insn(p, BRW_OPCODE_ADD); - do_insn = get_inner_do_insn(p); - - brw_set_dest(p, insn, brw_ip_reg()); - brw_set_src0(p, insn, brw_ip_reg()); - brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); - insn->header.execution_size = BRW_EXECUTE_1; - } else { - insn = next_insn(p, BRW_OPCODE_WHILE); - do_insn = get_inner_do_insn(p); - - assert(do_insn->header.opcode == BRW_OPCODE_DO); - - brw_set_dest(p, insn, brw_ip_reg()); - brw_set_src0(p, insn, brw_ip_reg()); - brw_set_src1(p, insn, brw_imm_d(0)); - - insn->header.execution_size = do_insn->header.execution_size; - insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); - insn->bits3.if_else.pop_count = 0; - insn->bits3.if_else.pad0 = 0; - - brw_patch_break_cont(p, insn); - } - } - insn->header.compression_control = BRW_COMPRESSION_NONE; - p->current->header.predicate_control = BRW_PREDICATE_NONE; - - p->loop_stack_depth--; - - return insn; -} - - -/* FORWARD JUMPS: - */ -void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx) -{ - struct intel_context *intel = &p->brw->intel; - struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx]; - GLuint jmpi = 1; - - if (intel->gen >= 5) - jmpi = 2; - - assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); - assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); - - jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1); -} - - - /* To integrate with the above, it makes sense that the comparison * instruction should populate the flag register. It might be simpler * just to use the flag reg for most WM tasks? */ void brw_CMP(struct brw_compile *p, - struct brw_reg dest, - GLuint conditional, - struct brw_reg src0, - struct brw_reg src1) + struct brw_reg dest, + uint32_t conditional, + struct brw_reg src0, + struct brw_reg src1) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); @@ -1637,41 +880,38 @@ void brw_WAIT (struct brw_compile *p) * Helpers for the various SEND message types: */ -/** Extended math function, float[8]. - */ -void brw_math( struct brw_compile *p, - struct brw_reg dest, - GLuint function, - GLuint saturate, - GLuint msg_reg_nr, - struct brw_reg src, - GLuint data_type, - GLuint precision ) +/** Extended math function, float[8]. */ +void brw_math(struct brw_compile *p, + struct brw_reg dest, + uint32_t function, + uint32_t saturate, + uint32_t msg_reg_nr, + struct brw_reg src, + uint32_t data_type, + uint32_t precision) { - struct intel_context *intel = &p->brw->intel; - - if (intel->gen >= 6) { + if (p->gen >= 6) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); assert(dest.file == BRW_GENERAL_REGISTER_FILE); assert(src.file == BRW_GENERAL_REGISTER_FILE); assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); - if (intel->gen == 6) - assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + if (p->gen == 6) + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); /* Source modifiers are ignored for extended math instructions on Gen6. */ - if (intel->gen == 6) { - assert(!src.negate); - assert(!src.abs); + if (p->gen == 6) { + assert(!src.negate); + assert(!src.abs); } if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || - function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || - function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { - assert(src.type != BRW_REGISTER_TYPE_F); + function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || + function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { + assert(src.type != BRW_REGISTER_TYPE_F); } else { - assert(src.type == BRW_REGISTER_TYPE_F); + assert(src.type == BRW_REGISTER_TYPE_F); } /* Math is the same ISA format as other opcodes, except that CondModifier @@ -1695,28 +935,26 @@ void brw_math( struct brw_compile *p, brw_set_dest(p, insn, dest); brw_set_src0(p, insn, src); brw_set_math_message(p, - insn, - function, - src.type == BRW_REGISTER_TYPE_D, - precision, - saturate, - data_type); + insn, + function, + src.type == BRW_REGISTER_TYPE_D, + precision, + saturate, + data_type); } } /** Extended math function, float[8]. */ void brw_math2(struct brw_compile *p, - struct brw_reg dest, - GLuint function, - struct brw_reg src0, - struct brw_reg src1) + struct brw_reg dest, + uint32_t function, + struct brw_reg src0, + struct brw_reg src1) { - struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); - assert(intel->gen >= 6); - (void) intel; + assert(p->gen >= 6); assert(dest.file == BRW_GENERAL_REGISTER_FILE); @@ -1724,7 +962,7 @@ void brw_math2(struct brw_compile *p, assert(src1.file == BRW_GENERAL_REGISTER_FILE); assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); - if (intel->gen == 6) { + if (p->gen == 6) { assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); } @@ -1740,7 +978,7 @@ void brw_math2(struct brw_compile *p, } /* Source modifiers are ignored for extended math instructions on Gen6. */ - if (intel->gen == 6) { + if (p->gen == 6) { assert(!src0.negate); assert(!src0.abs); assert(!src1.negate); @@ -1761,18 +999,17 @@ void brw_math2(struct brw_compile *p, * Extended math function, float[16]. * Use 2 send instructions. */ -void brw_math_16( struct brw_compile *p, - struct brw_reg dest, - GLuint function, - GLuint saturate, - GLuint msg_reg_nr, - struct brw_reg src, - GLuint precision ) +void brw_math_16(struct brw_compile *p, + struct brw_reg dest, + uint32_t function, + uint32_t saturate, + uint32_t msg_reg_nr, + struct brw_reg src, + uint32_t precision) { - struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; - if (intel->gen >= 6) { + if (p->gen >= 6) { insn = next_insn(p, BRW_OPCODE_MATH); /* Math is the same ISA format as other opcodes, except that CondModifier @@ -1803,12 +1040,12 @@ void brw_math_16( struct brw_compile *p, brw_set_dest(p, insn, dest); brw_set_src0(p, insn, src); brw_set_math_message(p, - insn, - function, - BRW_MATH_INTEGER_UNSIGNED, - precision, - saturate, - BRW_MATH_DATA_VECTOR); + insn, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); /* Second instruction: */ @@ -1819,12 +1056,12 @@ void brw_math_16( struct brw_compile *p, brw_set_dest(p, insn, offset(dest,1)); brw_set_src0(p, insn, src); brw_set_math_message(p, - insn, - function, - BRW_MATH_INTEGER_UNSIGNED, - precision, - saturate, - BRW_MATH_DATA_VECTOR); + insn, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); brw_pop_insn_state(p); } @@ -1838,15 +1075,14 @@ void brw_math_16( struct brw_compile *p, * register spilling. */ void brw_oword_block_write_scratch(struct brw_compile *p, - struct brw_reg mrf, - int num_regs, - GLuint offset) + struct brw_reg mrf, + int num_regs, + uint32_t offset) { - struct intel_context *intel = &p->brw->intel; uint32_t msg_control, msg_type; int mlen; - if (intel->gen >= 6) + if (p->gen >= 6) offset /= 16; mrf = retype(mrf, BRW_REGISTER_TYPE_UD); @@ -1873,10 +1109,10 @@ void brw_oword_block_write_scratch(struct brw_compile *p, /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, - retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, - mrf.nr, - 2), BRW_REGISTER_TYPE_UD), - brw_imm_ud(offset)); + retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + mrf.nr, + 2), BRW_REGISTER_TYPE_UD), + brw_imm_ud(offset)); brw_pop_insn_state(p); } @@ -1886,11 +1122,11 @@ void brw_oword_block_write_scratch(struct brw_compile *p, struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); int send_commit_msg; struct brw_reg src_header = retype(brw_vec8_grf(0, 0), - BRW_REGISTER_TYPE_UW); + BRW_REGISTER_TYPE_UW); if (insn->header.compression_control != BRW_COMPRESSION_NONE) { - insn->header.compression_control = BRW_COMPRESSION_NONE; - src_header = vec16(src_header); + insn->header.compression_control = BRW_COMPRESSION_NONE; + src_header = vec16(src_header); } assert(insn->header.predicate_control == BRW_PREDICATE_NONE); insn->header.destreg__conditionalmod = mrf.nr; @@ -1905,37 +1141,37 @@ void brw_oword_block_write_scratch(struct brw_compile *p, * protection. Our use of DP writes is all about register * spilling within a thread. */ - if (intel->gen >= 6) { - dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); - send_commit_msg = 0; + if (p->gen >= 6) { + dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); + send_commit_msg = 0; } else { - dest = src_header; - send_commit_msg = 1; + dest = src_header; + send_commit_msg = 1; } brw_set_dest(p, insn, dest); - if (intel->gen >= 6) { - brw_set_src0(p, insn, mrf); + if (p->gen >= 6) { + brw_set_src0(p, insn, mrf); } else { - brw_set_src0(p, insn, brw_null_reg()); + brw_set_src0(p, insn, brw_null_reg()); } - if (intel->gen >= 6) - msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; + if (p->gen >= 6) + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; else - msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; + msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; brw_set_dp_write_message(p, - insn, - 255, /* binding table index (255=stateless) */ - msg_control, - msg_type, - mlen, - true, /* header_present */ - 0, /* not a render target */ - send_commit_msg, /* response_length */ - 0, /* eot */ - send_commit_msg); + insn, + 255, /* binding table index (255=stateless) */ + msg_control, + msg_type, + mlen, + true, /* header_present */ + 0, /* not a render target */ + send_commit_msg, /* response_length */ + 0, /* eot */ + send_commit_msg); } } @@ -1949,16 +1185,15 @@ void brw_oword_block_write_scratch(struct brw_compile *p, */ void brw_oword_block_read_scratch(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg mrf, - int num_regs, - GLuint offset) + struct brw_reg dest, + struct brw_reg mrf, + int num_regs, + uint32_t offset) { - struct intel_context *intel = &p->brw->intel; uint32_t msg_control; int rlen; - if (intel->gen >= 6) + if (p->gen >= 6) offset /= 16; mrf = retype(mrf, BRW_REGISTER_TYPE_UD); @@ -1981,10 +1216,10 @@ brw_oword_block_read_scratch(struct brw_compile *p, /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, - retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, - mrf.nr, - 2), BRW_REGISTER_TYPE_UD), - brw_imm_ud(offset)); + retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + mrf.nr, + 2), BRW_REGISTER_TYPE_UD), + brw_imm_ud(offset)); brw_pop_insn_state(p); } @@ -1996,21 +1231,21 @@ brw_oword_block_read_scratch(struct brw_compile *p, insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = mrf.nr; - brw_set_dest(p, insn, dest); /* UW? */ - if (intel->gen >= 6) { - brw_set_src0(p, insn, mrf); + brw_set_dest(p, insn, dest); /* UW? */ + if (p->gen >= 6) { + brw_set_src0(p, insn, mrf); } else { - brw_set_src0(p, insn, brw_null_reg()); + brw_set_src0(p, insn, brw_null_reg()); } brw_set_dp_read_message(p, - insn, - 255, /* binding table index (255=stateless) */ - msg_control, - BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ - BRW_DATAPORT_READ_TARGET_RENDER_CACHE, - 1, /* msg_length */ - rlen); + insn, + 255, /* binding table index (255=stateless) */ + msg_control, + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + BRW_DATAPORT_READ_TARGET_RENDER_CACHE, + 1, /* msg_length */ + rlen); } } @@ -2020,15 +1255,14 @@ brw_oword_block_read_scratch(struct brw_compile *p, * Used for fetching shader constants. */ void brw_oword_block_read(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg mrf, - uint32_t offset, - uint32_t bind_table_index) + struct brw_reg dest, + struct brw_reg mrf, + uint32_t offset, + uint32_t bind_table_index) { - struct intel_context *intel = &p->brw->intel; /* On newer hardware, offset is in units of owords. */ - if (intel->gen >= 6) + if (p->gen >= 6) offset /= 16; mrf = retype(mrf, BRW_REGISTER_TYPE_UD); @@ -2042,10 +1276,10 @@ void brw_oword_block_read(struct brw_compile *p, /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, - retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, - mrf.nr, - 2), BRW_REGISTER_TYPE_UD), - brw_imm_ud(offset)); + retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + mrf.nr, + 2), BRW_REGISTER_TYPE_UD), + brw_imm_ud(offset)); struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.destreg__conditionalmod = mrf.nr; @@ -2054,20 +1288,20 @@ void brw_oword_block_read(struct brw_compile *p, dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); brw_set_dest(p, insn, dest); - if (intel->gen >= 6) { + if (p->gen >= 6) { brw_set_src0(p, insn, mrf); } else { brw_set_src0(p, insn, brw_null_reg()); } brw_set_dp_read_message(p, - insn, - bind_table_index, - BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, - BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, - BRW_DATAPORT_READ_TARGET_DATA_CACHE, - 1, /* msg_length */ - 1); /* response_length (1 reg, 2 owords!) */ + insn, + bind_table_index, + BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 1, /* msg_length */ + 1); /* response_length (1 reg, 2 owords!) */ brw_pop_insn_state(p); } @@ -2079,9 +1313,9 @@ void brw_oword_block_read(struct brw_compile *p, * the provided mrf header reg. */ void brw_dword_scattered_read(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg mrf, - uint32_t bind_table_index) + struct brw_reg dest, + struct brw_reg mrf, + uint32_t bind_table_index) { mrf = retype(mrf, BRW_REGISTER_TYPE_UD); @@ -2102,17 +1336,15 @@ void brw_dword_scattered_read(struct brw_compile *p, brw_set_src0(p, insn, brw_null_reg()); brw_set_dp_read_message(p, - insn, - bind_table_index, - BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, - BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, - BRW_DATAPORT_READ_TARGET_DATA_CACHE, - 2, /* msg_length */ - 1); /* response_length */ + insn, + bind_table_index, + BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, + BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 2, /* msg_length */ + 1); /* response_length */ } - - /** * Read float[4] constant(s) from VS constant buffer. * For relative addressing, two float[4] constants will be read into 'dest'. @@ -2120,14 +1352,13 @@ void brw_dword_scattered_read(struct brw_compile *p, */ void brw_dp_READ_4_vs(struct brw_compile *p, struct brw_reg dest, - GLuint location, - GLuint bind_table_index) + uint32_t location, + uint32_t bind_table_index) { - struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; - GLuint msg_reg_nr = 1; + uint32_t msg_reg_nr = 1; - if (intel->gen >= 6) + if (p->gen >= 6) location /= 16; /* Setup MRF[1] with location/offset into const buffer */ @@ -2137,8 +1368,8 @@ void brw_dp_READ_4_vs(struct brw_compile *p, brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2), - BRW_REGISTER_TYPE_UD), - brw_imm_ud(location)); + BRW_REGISTER_TYPE_UD), + brw_imm_ud(location)); brw_pop_insn_state(p); insn = next_insn(p, BRW_OPCODE_SEND); @@ -2149,20 +1380,20 @@ void brw_dp_READ_4_vs(struct brw_compile *p, insn->header.mask_control = BRW_MASK_DISABLE; brw_set_dest(p, insn, dest); - if (intel->gen >= 6) { + if (p->gen >= 6) { brw_set_src0(p, insn, brw_message_reg(msg_reg_nr)); } else { brw_set_src0(p, insn, brw_null_reg()); } brw_set_dp_read_message(p, - insn, - bind_table_index, - 0, - BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ - BRW_DATAPORT_READ_TARGET_DATA_CACHE, - 1, /* msg_length */ - 1); /* response_length (1 Oword) */ + insn, + bind_table_index, + 0, + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 1, /* msg_length */ + 1); /* response_length (1 Oword) */ } /** @@ -2170,12 +1401,11 @@ void brw_dp_READ_4_vs(struct brw_compile *p, * relative addressing. */ void brw_dp_READ_4_vs_relative(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg addr_reg, - GLuint offset, - GLuint bind_table_index) + struct brw_reg dest, + struct brw_reg addr_reg, + uint32_t offset, + uint32_t bind_table_index) { - struct intel_context *intel = &p->brw->intel; struct brw_reg src = brw_vec8_grf(0, 0); int msg_type; @@ -2190,7 +1420,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, * fields ignored. */ brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D), - addr_reg, brw_imm_d(offset)); + addr_reg, brw_imm_d(offset)); brw_pop_insn_state(p); gen6_resolve_implied_move(p, &src, 0); @@ -2204,38 +1434,30 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, brw_set_dest(p, insn, dest); brw_set_src0(p, insn, src); - if (intel->gen >= 6) - msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; - else if (intel->gen == 5 || intel->is_g4x) - msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; - else - msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; brw_set_dp_read_message(p, - insn, - bind_table_index, - BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, - msg_type, - BRW_DATAPORT_READ_TARGET_DATA_CACHE, - 2, /* msg_length */ - 1); /* response_length */ + insn, + bind_table_index, + BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, + msg_type, + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 2, /* msg_length */ + 1); /* response_length */ } - - void brw_fb_WRITE(struct brw_compile *p, - int dispatch_width, - GLuint msg_reg_nr, + int dispatch_width, + uint32_t msg_reg_nr, struct brw_reg src0, - GLuint binding_table_index, - GLuint msg_length, - GLuint response_length, + uint32_t binding_table_index, + uint32_t msg_length, + uint32_t response_length, bool eot, bool header_present) { - struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; - GLuint msg_control, msg_type; + uint32_t msg_control, msg_type; struct brw_reg dest; if (dispatch_width == 16) @@ -2243,7 +1465,7 @@ void brw_fb_WRITE(struct brw_compile *p, else dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); - if (intel->gen >= 6 && binding_table_index == 0) { + if (p->gen >= 6 && binding_table_index == 0) { insn = next_insn(p, BRW_OPCODE_SENDC); } else { insn = next_insn(p, BRW_OPCODE_SEND); @@ -2252,7 +1474,7 @@ void brw_fb_WRITE(struct brw_compile *p, insn->header.predicate_control = 0; insn->header.compression_control = BRW_COMPRESSION_NONE; - if (intel->gen >= 6) { + if (p->gen >= 6) { /* headerless version, just submit color payload */ src0 = brw_message_reg(msg_reg_nr); @@ -2271,16 +1493,16 @@ void brw_fb_WRITE(struct brw_compile *p, brw_set_dest(p, insn, dest); brw_set_src0(p, insn, src0); brw_set_dp_write_message(p, - insn, - binding_table_index, - msg_control, - msg_type, - msg_length, - header_present, - eot, /* last render target write */ - response_length, - eot, - 0 /* send_commit_msg */); + insn, + binding_table_index, + msg_control, + msg_type, + msg_length, + header_present, + eot, /* last render target write */ + response_length, + eot, + 0 /* send_commit_msg */); } @@ -2290,20 +1512,19 @@ void brw_fb_WRITE(struct brw_compile *p, * of sampling operation is performed. See volume 4, page 161 of docs. */ void brw_SAMPLE(struct brw_compile *p, - struct brw_reg dest, - GLuint msg_reg_nr, - struct brw_reg src0, - GLuint binding_table_index, - GLuint sampler, - GLuint writemask, - GLuint msg_type, - GLuint response_length, - GLuint msg_length, - GLuint header_present, - GLuint simd_mode, - GLuint return_format) + struct brw_reg dest, + uint32_t msg_reg_nr, + struct brw_reg src0, + uint32_t binding_table_index, + uint32_t sampler, + uint32_t writemask, + uint32_t msg_type, + uint32_t response_length, + uint32_t msg_length, + uint32_t header_present, + uint32_t simd_mode, + uint32_t return_format) { - struct intel_context *intel = &p->brw->intel; bool need_stall = 0; if (writemask == 0) { @@ -2322,56 +1543,56 @@ void brw_SAMPLE(struct brw_compile *p, * needed. */ if (writemask != WRITEMASK_XYZW) { - GLuint dst_offset = 0; - GLuint i, newmask = 0, len = 0; + uint32_t dst_offset = 0; + uint32_t i, newmask = 0, len = 0; for (i = 0; i < 4; i++) { - if (writemask & (1<current, dest); - if (p->current->header.execution_size == BRW_EXECUTE_16) - dispatch_16 = true; + guess_execution_size(p, p->current, dest); + if (p->current->header.execution_size == BRW_EXECUTE_16) + dispatch_16 = true; - newmask = ~newmask & WRITEMASK_XYZW; + newmask = ~newmask & WRITEMASK_XYZW; - brw_push_insn_state(p); + brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), - retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); + brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), + retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); - brw_pop_insn_state(p); + brw_pop_insn_state(p); - src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); - dest = offset(dest, dst_offset); + src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + dest = offset(dest, dst_offset); - /* For 16-wide dispatch, masked channels are skipped in the - * response. For 8-wide, masked channels still take up slots, - * and are just not written to. - */ - if (dispatch_16) - response_length = len * 2; + /* For 16-wide dispatch, masked channels are skipped in the + * response. For 8-wide, masked channels still take up slots, + * and are just not written to. + */ + if (dispatch_16) + response_length = len * 2; } } @@ -2383,20 +1604,20 @@ void brw_SAMPLE(struct brw_compile *p, insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - if (intel->gen < 6) - insn->header.destreg__conditionalmod = msg_reg_nr; + if (p->gen < 6) + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(p, insn, dest); brw_set_src0(p, insn, src0); brw_set_sampler_message(p, insn, - binding_table_index, - sampler, - msg_type, - response_length, - msg_length, - header_present, - simd_mode, - return_format); + binding_table_index, + sampler, + msg_type, + response_length, + msg_length, + header_present, + simd_mode, + return_format); } if (need_stall) { @@ -2407,238 +1628,9 @@ void brw_SAMPLE(struct brw_compile *p, brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD), - retype(reg, BRW_REGISTER_TYPE_UD)); + retype(reg, BRW_REGISTER_TYPE_UD)); brw_pop_insn_state(p); } } -/* All these variables are pretty confusing - we might be better off - * using bitmasks and macros for this, in the old style. Or perhaps - * just having the caller instantiate the fields in dword3 itself. - */ -void brw_urb_WRITE(struct brw_compile *p, - struct brw_reg dest, - GLuint msg_reg_nr, - struct brw_reg src0, - bool allocate, - bool used, - GLuint msg_length, - GLuint response_length, - bool eot, - bool writes_complete, - GLuint offset, - GLuint swizzle) -{ - struct intel_context *intel = &p->brw->intel; - struct brw_instruction *insn; - - gen6_resolve_implied_move(p, &src0, msg_reg_nr); - - if (intel->gen == 7) { - /* Enable Channel Masks in the URB_WRITE_HWORD message header */ - brw_push_insn_state(p); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5), - BRW_REGISTER_TYPE_UD), - retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), - brw_imm_ud(0xff00)); - brw_pop_insn_state(p); - } - - insn = next_insn(p, BRW_OPCODE_SEND); - - assert(msg_length < BRW_MAX_MRF); - - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, src0); - brw_set_src1(p, insn, brw_imm_d(0)); - - if (intel->gen < 6) - insn->header.destreg__conditionalmod = msg_reg_nr; - - brw_set_urb_message(p, - insn, - allocate, - used, - msg_length, - response_length, - eot, - writes_complete, - offset, - swizzle); -} - -static int -brw_find_next_block_end(struct brw_compile *p, int start) -{ - int ip; - - for (ip = start + 1; ip < p->nr_insn; ip++) { - struct brw_instruction *insn = &p->store[ip]; - - switch (insn->header.opcode) { - case BRW_OPCODE_ENDIF: - case BRW_OPCODE_ELSE: - case BRW_OPCODE_WHILE: - return ip; - } - } - - return 0; -} - -/* There is no DO instruction on gen6, so to find the end of the loop - * we have to see if the loop is jumping back before our start - * instruction. - */ -static int -brw_find_loop_end(struct brw_compile *p, int start) -{ - struct intel_context *intel = &p->brw->intel; - int ip; - int br = 2; - - for (ip = start + 1; ip < p->nr_insn; ip++) { - struct brw_instruction *insn = &p->store[ip]; - - if (insn->header.opcode == BRW_OPCODE_WHILE) { - int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count - : insn->bits3.break_cont.jip; - if (ip + jip / br <= start) - return ip; - } - } - assert(!"not reached"); - return start + 1; -} - -/* After program generation, go back and update the UIP and JIP of - * BREAK, CONT, and HALT instructions to their correct locations. - */ -void -brw_set_uip_jip(struct brw_compile *p) -{ - struct intel_context *intel = &p->brw->intel; - int ip; - int br = 2; - - if (intel->gen < 6) - return; - - for (ip = 0; ip < p->nr_insn; ip++) { - struct brw_instruction *insn = &p->store[ip]; - int block_end_ip = 0; - - if (insn->header.opcode == BRW_OPCODE_BREAK || - insn->header.opcode == BRW_OPCODE_CONTINUE || - insn->header.opcode == BRW_OPCODE_HALT) { - block_end_ip = brw_find_next_block_end(p, ip); - } - - switch (insn->header.opcode) { - case BRW_OPCODE_BREAK: - assert(block_end_ip != 0); - insn->bits3.break_cont.jip = br * (block_end_ip - ip); - /* Gen7 UIP points to WHILE; Gen6 points just after it */ - insn->bits3.break_cont.uip = - br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0)); - break; - case BRW_OPCODE_CONTINUE: - assert(block_end_ip != 0); - insn->bits3.break_cont.jip = br * (block_end_ip - ip); - insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip); - - assert(insn->bits3.break_cont.uip != 0); - assert(insn->bits3.break_cont.jip != 0); - break; - case BRW_OPCODE_HALT: - /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19): - * - * "In case of the halt instruction not inside any conditional code - * block, the value of and should be the same. In case - * of the halt instruction inside conditional code block, the - * should be the end of the program, and the should be end of - * the most inner conditional code block." - * - * The uip will have already been set by whoever set up the - * instruction. - */ - if (block_end_ip == 0) { - insn->bits3.break_cont.jip = insn->bits3.break_cont.uip; - } else { - insn->bits3.break_cont.jip = br * (block_end_ip - ip); - } - assert(insn->bits3.break_cont.uip != 0); - assert(insn->bits3.break_cont.jip != 0); - break; - } - } -} - -void brw_ff_sync(struct brw_compile *p, - struct brw_reg dest, - GLuint msg_reg_nr, - struct brw_reg src0, - bool allocate, - GLuint response_length, - bool eot) -{ - struct intel_context *intel = &p->brw->intel; - struct brw_instruction *insn; - - gen6_resolve_implied_move(p, &src0, msg_reg_nr); - - insn = next_insn(p, BRW_OPCODE_SEND); - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, src0); - brw_set_src1(p, insn, brw_imm_d(0)); - - if (intel->gen < 6) - insn->header.destreg__conditionalmod = msg_reg_nr; - - brw_set_ff_sync_message(p, - insn, - allocate, - response_length, - eot); -} - -/** - * Emit the SEND instruction necessary to generate stream output data on Gen6 - * (for transform feedback). - * - * If send_commit_msg is true, this is the last piece of stream output data - * from this thread, so send the data as a committed write. According to the - * Sandy Bridge PRM (volume 2 part 1, section 4.5.1): - * - * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all - * writes are complete by sending the final write as a committed write." - */ -void -brw_svb_write(struct brw_compile *p, - struct brw_reg dest, - GLuint msg_reg_nr, - struct brw_reg src0, - GLuint binding_table_index, - bool send_commit_msg) -{ - struct brw_instruction *insn; - - gen6_resolve_implied_move(p, &src0, msg_reg_nr); - - insn = next_insn(p, BRW_OPCODE_SEND); - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, src0); - brw_set_src1(p, insn, brw_imm_d(0)); - brw_set_dp_write_message(p, insn, - binding_table_index, - 0, /* msg_control: ignored */ - GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, - 1, /* msg_length */ - true, /* header_present */ - 0, /* last_render_target: ignored */ - send_commit_msg, /* response_length */ - 0, /* end_of_thread */ - send_commit_msg); /* send_commit_msg */ -} diff --git a/backend/src/gen/brw_structs.h b/backend/src/gen/brw_structs.h new file mode 100644 index 0000000..ca9071a --- /dev/null +++ b/backend/src/gen/brw_structs.h @@ -0,0 +1,758 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * Author: Benjamin Segovia + */ + + /* + * Authors: + * Keith Whitwell + */ +#ifndef BRW_STRUCTS_H +#define BRW_STRUCTS_H + +#include + +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 + +/** + * First GRF used for the MRF hack. + * + * On gen7, MRFs are no longer used, and contiguous GRFs are used instead. We + * haven't converted our compiler to be aware of this, so it asks for MRFs and + * brw_eu_emit.c quietly converts them to be accesses of the top GRFs. The + * register allocators have to be careful of this to avoid corrupting the "MRF"s + * with actual GRF allocations. + */ +#define GEN7_MRF_HACK_START 112. + +/** Number of message register file registers */ +#define BRW_MAX_MRF 16 + +struct brw_urb_immediate { + uint32_t opcode:4; + uint32_t offset:6; + uint32_t swizzle_control:2; + uint32_t pad:1; + uint32_t allocate:1; + uint32_t used:1; + uint32_t complete:1; + uint32_t response_length:4; + uint32_t msg_length:4; + uint32_t msg_target:4; + uint32_t pad1:3; + uint32_t end_of_thread:1; +}; + +struct brw_sampler_state +{ + struct + { + uint32_t shadow_function:3; + uint32_t lod_bias:11; + uint32_t min_filter:3; + uint32_t mag_filter:3; + uint32_t mip_filter:2; + uint32_t base_level:5; + uint32_t min_mag_neq:1; + uint32_t lod_preclamp:1; + uint32_t default_color_mode:1; + uint32_t pad0:1; + uint32_t disable:1; + } ss0; + + struct + { + uint32_t r_wrap_mode:3; + uint32_t t_wrap_mode:3; + uint32_t s_wrap_mode:3; + uint32_t cube_control_mode:1; + uint32_t pad:2; + uint32_t max_lod:10; + uint32_t min_lod:10; + } ss1; + + struct + { + uint32_t pad:5; + uint32_t default_color_pointer:27; + } ss2; + + struct + { + uint32_t non_normalized_coord:1; + uint32_t pad:12; + uint32_t address_round:6; + uint32_t max_aniso:3; + uint32_t chroma_key_mode:1; + uint32_t chroma_key_index:2; + uint32_t chroma_key_enable:1; + uint32_t monochrome_filter_width:3; + uint32_t monochrome_filter_height:3; + } ss3; +}; + +struct gen7_sampler_state +{ + struct + { + uint32_t aniso_algorithm:1; + uint32_t lod_bias:13; + uint32_t min_filter:3; + uint32_t mag_filter:3; + uint32_t mip_filter:2; + uint32_t base_level:5; + uint32_t pad1:1; + uint32_t lod_preclamp:1; + uint32_t default_color_mode:1; + uint32_t pad0:1; + uint32_t disable:1; + } ss0; + + struct + { + uint32_t cube_control_mode:1; + uint32_t shadow_function:3; + uint32_t pad:4; + uint32_t max_lod:12; + uint32_t min_lod:12; + } ss1; + + struct + { + uint32_t pad:5; + uint32_t default_color_pointer:27; + } ss2; + + struct + { + uint32_t r_wrap_mode:3; + uint32_t t_wrap_mode:3; + uint32_t s_wrap_mode:3; + uint32_t pad:1; + uint32_t non_normalized_coord:1; + uint32_t trilinear_quality:2; + uint32_t address_round:6; + uint32_t max_aniso:3; + uint32_t chroma_key_mode:1; + uint32_t chroma_key_index:2; + uint32_t chroma_key_enable:1; + uint32_t pad0:6; + } ss3; +}; + +/* Instruction format for the execution units */ +struct brw_instruction +{ + struct + { + uint32_t opcode:7; + uint32_t pad:1; + uint32_t access_mode:1; + uint32_t mask_control:1; + uint32_t dependency_control:2; + uint32_t compression_control:2; /* gen6: quater control */ + uint32_t thread_control:2; + uint32_t predicate_control:4; + uint32_t predicate_inverse:1; + uint32_t execution_size:3; + /** + * Conditional Modifier for most instructions. On Gen6+, this is also + * used for the SEND instruction's Message Target/SFID. + */ + uint32_t destreg__conditionalmod:4; + uint32_t acc_wr_control:1; + uint32_t cmpt_control:1; + uint32_t debug_control:1; + uint32_t saturate:1; + } header; + + union { + struct + { + uint32_t dest_reg_file:2; + uint32_t dest_reg_type:3; + uint32_t src0_reg_file:2; + uint32_t src0_reg_type:3; + uint32_t src1_reg_file:2; + uint32_t src1_reg_type:3; + uint32_t pad:1; + uint32_t dest_subreg_nr:5; + uint32_t dest_reg_nr:8; + uint32_t dest_horiz_stride:2; + uint32_t dest_address_mode:1; + } da1; + + struct + { + uint32_t dest_reg_file:2; + uint32_t dest_reg_type:3; + uint32_t src0_reg_file:2; + uint32_t src0_reg_type:3; + uint32_t src1_reg_file:2; /* 0x00000c00 */ + uint32_t src1_reg_type:3; /* 0x00007000 */ + uint32_t pad:1; + int dest_indirect_offset:10; /* offset against the deref'd address reg */ + uint32_t dest_subreg_nr:3; /* subnr for the address reg a0.x */ + uint32_t dest_horiz_stride:2; + uint32_t dest_address_mode:1; + } ia1; + + struct + { + uint32_t dest_reg_file:2; + uint32_t dest_reg_type:3; + uint32_t src0_reg_file:2; + uint32_t src0_reg_type:3; + uint32_t src1_reg_file:2; + uint32_t src1_reg_type:3; + uint32_t pad:1; + uint32_t dest_writemask:4; + uint32_t dest_subreg_nr:1; + uint32_t dest_reg_nr:8; + uint32_t dest_horiz_stride:2; + uint32_t dest_address_mode:1; + } da16; + + struct + { + uint32_t dest_reg_file:2; + uint32_t dest_reg_type:3; + uint32_t src0_reg_file:2; + uint32_t src0_reg_type:3; + uint32_t pad0:6; + uint32_t dest_writemask:4; + int dest_indirect_offset:6; + uint32_t dest_subreg_nr:3; + uint32_t dest_horiz_stride:2; + uint32_t dest_address_mode:1; + } ia16; + + struct { + uint32_t dest_reg_file:2; + uint32_t dest_reg_type:3; + uint32_t src0_reg_file:2; + uint32_t src0_reg_type:3; + uint32_t src1_reg_file:2; + uint32_t src1_reg_type:3; + uint32_t pad:1; + + int jump_count:16; + } branch_gen6; + + struct { + uint32_t dest_reg_file:1; + uint32_t flag_subreg_num:1; + uint32_t pad0:2; + uint32_t src0_abs:1; + uint32_t src0_negate:1; + uint32_t src1_abs:1; + uint32_t src1_negate:1; + uint32_t src2_abs:1; + uint32_t src2_negate:1; + uint32_t pad1:7; + uint32_t dest_writemask:4; + uint32_t dest_subreg_nr:3; + uint32_t dest_reg_nr:8; + } da3src; + } bits1; + + + union { + struct + { + uint32_t src0_subreg_nr:5; + uint32_t src0_reg_nr:8; + uint32_t src0_abs:1; + uint32_t src0_negate:1; + uint32_t src0_address_mode:1; + uint32_t src0_horiz_stride:2; + uint32_t src0_width:3; + uint32_t src0_vert_stride:4; + uint32_t flag_reg_nr:1; + uint32_t pad:6; + } da1; + + struct + { + int src0_indirect_offset:10; + uint32_t src0_subreg_nr:3; + uint32_t src0_abs:1; + uint32_t src0_negate:1; + uint32_t src0_address_mode:1; + uint32_t src0_horiz_stride:2; + uint32_t src0_width:3; + uint32_t src0_vert_stride:4; + uint32_t flag_reg_nr:1; + uint32_t pad:6; + } ia1; + + struct + { + uint32_t src0_swz_x:2; + uint32_t src0_swz_y:2; + uint32_t src0_subreg_nr:1; + uint32_t src0_reg_nr:8; + uint32_t src0_abs:1; + uint32_t src0_negate:1; + uint32_t src0_address_mode:1; + uint32_t src0_swz_z:2; + uint32_t src0_swz_w:2; + uint32_t pad0:1; + uint32_t src0_vert_stride:4; + uint32_t flag_reg_nr:1; + uint32_t pad1:6; + } da16; + + struct + { + uint32_t src0_swz_x:2; + uint32_t src0_swz_y:2; + int src0_indirect_offset:6; + uint32_t src0_subreg_nr:3; + uint32_t src0_abs:1; + uint32_t src0_negate:1; + uint32_t src0_address_mode:1; + uint32_t src0_swz_z:2; + uint32_t src0_swz_w:2; + uint32_t pad0:1; + uint32_t src0_vert_stride:4; + uint32_t flag_reg_nr:1; + uint32_t pad1:6; + } ia16; + + /* Extended Message Descriptor for Ironlake (Gen5) SEND instruction. + * + * Does not apply to Gen6+. The SFID/message target moved to bits + * 27:24 of the header (destreg__conditionalmod); EOT is in bits3. + */ + struct + { + uint32_t pad:26; + uint32_t end_of_thread:1; + uint32_t pad1:1; + uint32_t sfid:4; + } send_gen5; /* for Ironlake only */ + + struct { + uint32_t src0_rep_ctrl:1; + uint32_t src0_swizzle:8; + uint32_t src0_subreg_nr:3; + uint32_t src0_reg_nr:8; + uint32_t pad0:1; + uint32_t src1_rep_ctrl:1; + uint32_t src1_swizzle:8; + uint32_t src1_subreg_nr_low:2; + } da3src; + } bits2; + + union + { + struct + { + uint32_t src1_subreg_nr:5; + uint32_t src1_reg_nr:8; + uint32_t src1_abs:1; + uint32_t src1_negate:1; + uint32_t src1_address_mode:1; + uint32_t src1_horiz_stride:2; + uint32_t src1_width:3; + uint32_t src1_vert_stride:4; + uint32_t pad0:7; + } da1; + + struct + { + uint32_t src1_swz_x:2; + uint32_t src1_swz_y:2; + uint32_t src1_subreg_nr:1; + uint32_t src1_reg_nr:8; + uint32_t src1_abs:1; + uint32_t src1_negate:1; + uint32_t src1_address_mode:1; + uint32_t src1_swz_z:2; + uint32_t src1_swz_w:2; + uint32_t pad1:1; + uint32_t src1_vert_stride:4; + uint32_t pad2:7; + } da16; + + struct + { + int src1_indirect_offset:10; + uint32_t src1_subreg_nr:3; + uint32_t src1_abs:1; + uint32_t src1_negate:1; + uint32_t src1_address_mode:1; + uint32_t src1_horiz_stride:2; + uint32_t src1_width:3; + uint32_t src1_vert_stride:4; + uint32_t flag_reg_nr:1; + uint32_t pad1:6; + } ia1; + + struct + { + uint32_t src1_swz_x:2; + uint32_t src1_swz_y:2; + int src1_indirect_offset:6; + uint32_t src1_subreg_nr:3; + uint32_t src1_abs:1; + uint32_t src1_negate:1; + uint32_t pad0:1; + uint32_t src1_swz_z:2; + uint32_t src1_swz_w:2; + uint32_t pad1:1; + uint32_t src1_vert_stride:4; + uint32_t flag_reg_nr:1; + uint32_t pad2:6; + } ia16; + + + struct + { + int jump_count:16; /* note: signed */ + uint32_t pop_count:4; + uint32_t pad0:12; + } if_else; + + /* This is also used for gen7 IF/ELSE instructions */ + struct + { + /* Signed jump distance to the ip to jump to if all channels + * are disabled after the break or continue. It should point + * to the end of the innermost control flow block, as that's + * where some channel could get re-enabled. + */ + int jip:16; + + /* Signed jump distance to the location to resume execution + * of this channel if it's enabled for the break or continue. + */ + int uip:16; + } break_cont; + + /** + * \defgroup SEND instructions / Message Descriptors + * + * @{ + */ + + /** + * Generic Message Descriptor for Gen4 SEND instructions. The structs + * below expand function_control to something specific for their + * message. Due to struct packing issues, they duplicate these bits. + * + * See the G45 PRM, Volume 4, Table 14-15. + */ + struct { + uint32_t function_control:16; + uint32_t response_length:4; + uint32_t msg_length:4; + uint32_t msg_target:4; + uint32_t pad1:3; + uint32_t end_of_thread:1; + } generic; + + /** + * Generic Message Descriptor for Gen5-7 SEND instructions. + * + * See the Sandybridge PRM, Volume 2 Part 2, Table 8-15. (Sadly, most + * of the information on the SEND instruction is missing from the public + * Ironlake PRM.) + * + * The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies. + * According to the SEND instruction description: + * "The MSb of the message description, the EOT field, always comes from + * bit 127 of the instruction word"...which is bit 31 of this field. + */ + struct { + uint32_t function_control:19; + uint32_t header_present:1; + uint32_t response_length:5; + uint32_t msg_length:4; + uint32_t pad1:2; + uint32_t end_of_thread:1; + } generic_gen5; + + /** G45 PRM, Volume 4, Section 6.1.1.1 */ + struct { + uint32_t function:4; + uint32_t int_type:1; + uint32_t precision:1; + uint32_t saturate:1; + uint32_t data_type:1; + uint32_t pad0:8; + uint32_t response_length:4; + uint32_t msg_length:4; + uint32_t msg_target:4; + uint32_t pad1:3; + uint32_t end_of_thread:1; + } math; + + /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */ + struct { + uint32_t function:4; + uint32_t int_type:1; + uint32_t precision:1; + uint32_t saturate:1; + uint32_t data_type:1; + uint32_t snapshot:1; + uint32_t pad0:10; + uint32_t header_present:1; + uint32_t response_length:5; + uint32_t msg_length:4; + uint32_t pad1:2; + uint32_t end_of_thread:1; + } math_gen5; + + /** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */ + struct { + uint32_t binding_table_index:8; + uint32_t sampler:4; + uint32_t return_format:2; + uint32_t msg_type:2; + uint32_t response_length:4; + uint32_t msg_length:4; + uint32_t msg_target:4; + uint32_t pad1:3; + uint32_t end_of_thread:1; + } sampler; + + /** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */ + struct { + uint32_t binding_table_index:8; + uint32_t sampler:4; + uint32_t msg_type:4; + uint32_t response_length:4; + uint32_t msg_length:4; + uint32_t msg_target:4; + uint32_t pad1:3; + uint32_t end_of_thread:1; + } sampler_g4x; + + /** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */ + struct { + uint32_t binding_table_index:8; + uint32_t sampler:4; + uint32_t msg_type:4; + uint32_t simd_mode:2; + uint32_t pad0:1; + uint32_t header_present:1; + uint32_t response_length:5; + uint32_t msg_length:4; + uint32_t pad1:2; + uint32_t end_of_thread:1; + } sampler_gen5; + + struct { + uint32_t binding_table_index:8; + uint32_t sampler:4; + uint32_t msg_type:5; + uint32_t simd_mode:2; + uint32_t header_present:1; + uint32_t response_length:5; + uint32_t msg_length:4; + uint32_t pad1:2; + uint32_t end_of_thread:1; + } sampler_gen7; + + struct brw_urb_immediate urb; + + struct { + uint32_t opcode:4; + uint32_t offset:6; + uint32_t swizzle_control:2; + uint32_t pad:1; + uint32_t allocate:1; + uint32_t used:1; + uint32_t complete:1; + uint32_t pad0:3; + uint32_t header_present:1; + uint32_t response_length:5; + uint32_t msg_length:4; + uint32_t pad1:2; + uint32_t end_of_thread:1; + } urb_gen5; + + struct { + uint32_t opcode:3; + uint32_t offset:11; + uint32_t swizzle_control:1; + uint32_t complete:1; + uint32_t per_slot_offset:1; + uint32_t pad0:2; + uint32_t header_present:1; + uint32_t response_length:5; + uint32_t msg_length:4; + uint32_t pad1:2; + uint32_t end_of_thread:1; + } urb_gen7; + + /** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */ + struct { + uint32_t binding_table_index:8; + uint32_t msg_control:4; + uint32_t msg_type:2; + uint32_t target_cache:2; + uint32_t response_length:4; + uint32_t msg_length:4; + uint32_t msg_target:4; + uint32_t pad1:3; + uint32_t end_of_thread:1; + } dp_read; + + /** G45 PRM, Volume 4, Section 5.10.1.1.2 */ + struct { + uint32_t binding_table_index:8; + uint32_t msg_control:3; + uint32_t msg_type:3; + uint32_t target_cache:2; + uint32_t response_length:4; + uint32_t msg_length:4; + uint32_t msg_target:4; + uint32_t pad1:3; + uint32_t end_of_thread:1; + } dp_read_g4x; + + /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ + struct { + uint32_t binding_table_index:8; + uint32_t msg_control:3; + uint32_t msg_type:3; + uint32_t target_cache:2; + uint32_t pad0:3; + uint32_t header_present:1; + uint32_t response_length:5; + uint32_t msg_length:4; + uint32_t pad1:2; + uint32_t end_of_thread:1; + } dp_read_gen5; + + /** G45 PRM, Volume 4, Section 5.10.1.1.2. For both Gen4 and G45. */ + struct { + uint32_t binding_table_index:8; + uint32_t msg_control:3; + uint32_t last_render_target:1; + uint32_t msg_type:3; + uint32_t send_commit_msg:1; + uint32_t response_length:4; + uint32_t msg_length:4; + uint32_t msg_target:4; + uint32_t pad1:3; + uint32_t end_of_thread:1; + } dp_write; + + /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ + struct { + uint32_t binding_table_index:8; + uint32_t msg_control:3; + uint32_t last_render_target:1; + uint32_t msg_type:3; + uint32_t send_commit_msg:1; + uint32_t pad0:3; + uint32_t header_present:1; + uint32_t response_length:5; + uint32_t msg_length:4; + uint32_t pad1:2; + uint32_t end_of_thread:1; + } dp_write_gen5; + + /** + * Message for the Sandybridge Sampler Cache or Constant Cache Data Port. + * + * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1. + **/ + struct { + uint32_t binding_table_index:8; + uint32_t msg_control:5; + uint32_t msg_type:3; + uint32_t pad0:3; + uint32_t header_present:1; + uint32_t response_length:5; + uint32_t msg_length:4; + uint32_t pad1:2; + uint32_t end_of_thread:1; + } gen6_dp_sampler_const_cache; + + /** + * Message for the Sandybridge Render Cache Data Port. + * + * Most fields are defined in the Sandybridge PRM, Volume 4 Part 1, + * Section 3.9.2.1.1: Message Descriptor. + * + * "Slot Group Select" and "Last Render Target" are part of the + * 5-bit message control for Render Target Write messages. See + * Section 3.9.9.2.1 of the same volume. + */ + struct { + uint32_t binding_table_index:8; + uint32_t msg_control:3; + uint32_t slot_group_select:1; + uint32_t last_render_target:1; + uint32_t msg_type:4; + uint32_t send_commit_msg:1; + uint32_t pad0:1; + uint32_t header_present:1; + uint32_t response_length:5; + uint32_t msg_length:4; + uint32_t pad1:2; + uint32_t end_of_thread:1; + } gen6_dp; + + /** + * Message for any of the Gen7 Data Port caches. + * + * Most fields are defined in BSpec volume 5c.2 Data Port / Messages / + * Data Port Messages / Message Descriptor. Once again, "Slot Group + * Select" and "Last Render Target" are part of the 6-bit message + * control for Render Target Writes. + */ + struct { + uint32_t binding_table_index:8; + uint32_t msg_control:3; + uint32_t slot_group_select:1; + uint32_t last_render_target:1; + uint32_t msg_control_pad:1; + uint32_t msg_type:4; + uint32_t pad1:1; + uint32_t header_present:1; + uint32_t response_length:5; + uint32_t msg_length:4; + uint32_t pad2:2; + uint32_t end_of_thread:1; + } gen7_dp; + /** @} */ + + struct { + uint32_t src1_subreg_nr_high:1; + uint32_t src1_reg_nr:8; + uint32_t pad0:1; + uint32_t src2_rep_ctrl:1; + uint32_t src2_swizzle:8; + uint32_t src2_subreg_nr:3; + uint32_t src2_reg_nr:8; + uint32_t pad1:2; + } da3src; + + int d; + uint32_t ud; + float f; + } bits3; +}; + +#endif diff --git a/backend/src/gen/program.cpp b/backend/src/gen/program.cpp new file mode 100644 index 0000000..324c206 --- /dev/null +++ b/backend/src/gen/program.cpp @@ -0,0 +1,70 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * Author: Benjamin Segovia + */ + +/** + * \file program.cpp + * \author Benjamin Segovia + */ + +#include "gen/program.h" +#include "gen/program.hpp" +#include "ir/liveness.hpp" +#include "ir/value.hpp" +#include "ir/unit.hpp" +#include "llvm/llvm_to_gen.hpp" + +namespace gbe { +namespace gen { + + Kernel::Kernel(void) : + args(NULL), insns(NULL), argNum(0), insnNum(0), liveness(NULL), dag(NULL) + {} + Kernel::~Kernel(void) { + GBE_SAFE_DELETE_ARRAY(insns); + GBE_SAFE_DELETE_ARRAY(args); + GBE_SAFE_DELETE(liveness); + GBE_SAFE_DELETE(dag); + } + + Program::Program(void) {} + Program::~Program(void) { + for (auto it = kernels.begin(); it != kernels.end(); ++it) + GBE_DELETE(it->second); + } + + bool Program::buildFromSource(const char *source, std::string &error) { + NOT_IMPLEMENTED; + return false; + } + bool Program::buildFromLLVMFile(const char *fileName, std::string &error) { + ir::Unit unit; + if (llvmToGen(unit, fileName) == false) { + error = std::string(fileName) + " not found"; + return false; + } + this->buildFromUnit(unit, error); + return true; + } + bool Program::buildFromUnit(const ir::Unit &unit, std::string &error) { + return false; + } + +} /* namespace gen */ +} /* namespace gbe */ + diff --git a/backend/src/gen/program.h b/backend/src/gen/program.h new file mode 100644 index 0000000..bddcc76 --- /dev/null +++ b/backend/src/gen/program.h @@ -0,0 +1,87 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * Author: Benjamin Segovia + */ + +/** + * \file program.h + * \author Benjamin Segovia + * + * C-like interface for the gen kernels and programs + */ + +#ifndef __GBE_GEN_PROGRAM_H__ +#define __GBE_GEN_PROGRAM_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/*! Opaque structure that interfaces a Gen program */ +typedef struct GenProgram GenProgram; + +/*! Opaque structure that interfaces a Gen kernel (ie one OCL function) */ +typedef struct GenKernel GenKernel; + +/*! Argument type for each function call */ +enum GenArgType { + GEN_ARG_VALUE = 0, // int, float and so on + GEN_ARG_GLOBAL_PTR = 1, // __global, __constant + GEN_ARG_STRUCTURE = 2, // By value structure + GEN_ARG_IMAGE = 3, // image2d_t, image3d_t + GEN_ARG_INVALUE = 0xffffffff +}; + +/*! Create a new program from the given source code (zero terminated string) */ +GenProgram *GenProgramNewFromSource(const char *source); + +/*! Create a new program from the given blob */ +GenProgram *GenProgramNewFromBinary(const char *binary, size_t size); + +/*! Destroy and deallocate the given program */ +void GenProgramDelete(GenProgram *program); + +/*! Get the number of functions in the program */ +uint32_t GenProgramGetKernelNum(const GenProgram *program); + +/*! Get the kernel from its name */ +const GenKernel GenProgramGetKernel(const GenProgram *program, const char *name); + +/*! Get the Gen ISA source code */ +const char *GenKernelGetCode(const GenKernel *kernel); + +/*! Get the size of the source code */ +const size_t GenKernelGetCodeSize(const GenKernel *kernel); + +/*! Get the total number of arguments */ +uint32_t GenKernelGetArgNum(const GenKernel *kernel); + +/*! Get the size of the given argument */ +uint32_t GenKernelGetArgSize(const GenKernel *kernel, uint32_t argID); + +/*! Get the type of the given argument */ +GenArgType GenKernelGetArgType(const GenKernel *kernel, uint32_t argID); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __GBE_GEN_PROGRAM_H__ */ + diff --git a/backend/src/gen/program.hpp b/backend/src/gen/program.hpp new file mode 100644 index 0000000..654e025 --- /dev/null +++ b/backend/src/gen/program.hpp @@ -0,0 +1,90 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * Author: Benjamin Segovia + */ + +/** + * \file program.hpp + * \author Benjamin Segovia + */ + +#ifndef __GBE_GEN_PROGRAM_HPP__ +#define __GBE_GEN_PROGRAM_HPP__ + +#include "gen/brw_structs.h" +#include "sys/hash_map.hpp" +#include + +namespace gbe { +namespace ir { + + class Unit; // Compilation unit. Contains the program to compile + class Liveness; // Describes liveness of each ir function register + class FunctionDAG; // Describes the instruction dependencies + +} /* namespace ir */ +} /* namespace gbe */ + +namespace gbe { +namespace gen { + + struct KernelArgument + { + GenArgType type; //!< Pointer, structure, regular value? + size_t size; //!< Size of each argument + }; + + /*! Describe a compiled kernel */ + struct Kernel : public NonCopyable + { + /*! Create an empty kernel with the given name */ + Kernel(void); + /*! Destroy it */ + ~Kernel(void); + + std::string name; //!< Kernel name + KernelArgument *args; //!< Each argument + brw_instruction *insns; //!< Instruction stream + uint32_t argNum; //!< Number of function arguments + uint32_t insnNum; //!< Number of instructions + ir::Liveness *liveness; //!< Used only for the build + ir::FunctionDAG *dag; //!< Used only for the build + GBE_STRUCT(Kernel); //!< Use gbe allocators + }; + + /*! Describe a compiled program */ + struct Program : public NonCopyable + { + /*! Create an empty program */ + Program(void); + /*! Destroy the program */ + ~Program(void); + /*! Build a program from a ir::Unit */ + bool buildFromUnit(const ir::Unit &unit, std::string &error); + /*! Buils a program from a LLVM source code */ + bool buildFromLLVMFile(const char *fileName, std::string &error); + /*! Buils a program from a OCL string */ + bool buildFromSource(const char *source, std::string &error); + /*! Kernels sorted by their name */ + hash_map kernels; + }; + +} /* namespace gen */ +} /* namespace gbe */ + +#endif /* __GBE_GEN_PROGRAM_HPP__ */ + diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp index 6cf73bc..802aed2 100644 --- a/backend/src/llvm/llvm_to_gen.cpp +++ b/backend/src/llvm/llvm_to_gen.cpp @@ -35,7 +35,7 @@ namespace gbe { - void llvmToGen(ir::Unit &unit, const char *fileName) + bool llvmToGen(ir::Unit &unit, const char *fileName) { using namespace llvm; // Get the global LLVM context @@ -45,7 +45,7 @@ namespace gbe SMDiagnostic Err; std::auto_ptr M; M.reset(ParseIRFile(fileName, Err, c)); - GBE_ASSERT (M.get() != 0); + if (M.get() == 0) return false; Module &mod = *M.get(); llvm::PassManager passes; @@ -58,6 +58,7 @@ namespace gbe passes.add(createGVNPass()); // Remove redundancies passes.add(createGenPass(unit)); passes.run(mod); + return true; } } /* namespace gbe */ diff --git a/backend/src/llvm/llvm_to_gen.hpp b/backend/src/llvm/llvm_to_gen.hpp index 84fa559..4006667 100644 --- a/backend/src/llvm/llvm_to_gen.hpp +++ b/backend/src/llvm/llvm_to_gen.hpp @@ -31,7 +31,7 @@ namespace gbe { } /* namespace ir */ /*! Convert the LLVM IR code to a GEN IR code */ - void llvmToGen(ir::Unit &unit, const char *fileName); + bool llvmToGen(ir::Unit &unit, const char *fileName); } /* namespace gbe */ -- 2.7.4