From cdde031ac2c8124721655532ee6f4149e20e9c61 Mon Sep 17 00:00:00 2001
From: Dylan Baker <dylan.c.baker@intel.com>
Date: Mon, 12 Apr 2021 11:26:45 -0700
Subject: [PATCH] classic/i965: Remove driver

Reviewed-by: Emma Anholt <emma@anholt.net>
Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Adam Jackson <ajax@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10153>
---
 .gitlab-ci.yml                                     |    4 -
 .gitlab-ci/meson/build.sh                          |    1 -
 include/pci_ids/i965_pci_ids.h                     |  260 -
 meson.build                                        |   28 +-
 meson_options.txt                                  |   16 +-
 src/intel/dev/intel_device_info.c                  |    4 +-
 src/intel/dev/intel_device_info_test.c             |    7 +-
 src/loader/meson.build                             |    9 -
 src/loader/pci_id_driver_map.h                     |    7 -
 src/mesa/drivers/dri/i965/brw_batch.c              | 1332 -----
 src/mesa/drivers/dri/i965/brw_batch.h              |  166 -
 src/mesa/drivers/dri/i965/brw_binding_tables.c     |  307 -
 src/mesa/drivers/dri/i965/brw_blit.c               |  790 ---
 src/mesa/drivers/dri/i965/brw_blit.h               |   65 -
 src/mesa/drivers/dri/i965/brw_blorp.c              | 1678 ------
 src/mesa/drivers/dri/i965/brw_blorp.h              |  137 -
 src/mesa/drivers/dri/i965/brw_buffer_objects.c     |  710 ---
 src/mesa/drivers/dri/i965/brw_buffer_objects.h     |  141 -
 src/mesa/drivers/dri/i965/brw_buffers.c            |   74 -
 src/mesa/drivers/dri/i965/brw_buffers.h            |   35 -
 src/mesa/drivers/dri/i965/brw_bufmgr.c             | 1967 -------
 src/mesa/drivers/dri/i965/brw_bufmgr.h             |  404 --
 src/mesa/drivers/dri/i965/brw_clear.c              |  302 -
 src/mesa/drivers/dri/i965/brw_clip.c               |  210 -
 src/mesa/drivers/dri/i965/brw_compute.c            |  151 -
 src/mesa/drivers/dri/i965/brw_conditional_render.c |  193 -
 src/mesa/drivers/dri/i965/brw_context.c            | 1975 -------
 src/mesa/drivers/dri/i965/brw_context.h            | 1637 ------
 src/mesa/drivers/dri/i965/brw_copy_image.c         |  139 -
 src/mesa/drivers/dri/i965/brw_cs.c                 |  220 -
 src/mesa/drivers/dri/i965/brw_cs.h                 |   46 -
 src/mesa/drivers/dri/i965/brw_curbe.c              |  356 --
 src/mesa/drivers/dri/i965/brw_defines.h            | 1668 ------
 src/mesa/drivers/dri/i965/brw_disk_cache.c         |  417 --
 src/mesa/drivers/dri/i965/brw_draw.c               | 1361 -----
 src/mesa/drivers/dri/i965/brw_draw.h               |   87 -
 src/mesa/drivers/dri/i965/brw_draw_upload.c        |  801 ---
 src/mesa/drivers/dri/i965/brw_extensions.c         |  404 --
 src/mesa/drivers/dri/i965/brw_fbo.c                | 1139 ----
 src/mesa/drivers/dri/i965/brw_fbo.h                |  255 -
 src/mesa/drivers/dri/i965/brw_ff_gs.c              |  178 -
 src/mesa/drivers/dri/i965/brw_ff_gs.h              |   42 -
 src/mesa/drivers/dri/i965/brw_formatquery.c        |  119 -
 src/mesa/drivers/dri/i965/brw_generate_mipmap.c    |  144 -
 src/mesa/drivers/dri/i965/brw_gs.c                 |  256 -
 src/mesa/drivers/dri/i965/brw_gs.h                 |   52 -
 src/mesa/drivers/dri/i965/brw_gs_surface_state.c   |  117 -
 src/mesa/drivers/dri/i965/brw_image.h              |  122 -
 src/mesa/drivers/dri/i965/brw_link.cpp             |  401 --
 src/mesa/drivers/dri/i965/brw_meta_util.c          |  422 --
 src/mesa/drivers/dri/i965/brw_meta_util.h          |   59 -
 src/mesa/drivers/dri/i965/brw_mipmap_tree.c        | 3308 -----------
 src/mesa/drivers/dri/i965/brw_mipmap_tree.h        |  741 ---
 src/mesa/drivers/dri/i965/brw_misc_state.c         |  728 ---
 src/mesa/drivers/dri/i965/brw_multisample_state.h  |  111 -
 src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp     |  450 --
 src/mesa/drivers/dri/i965/brw_object_purgeable.c   |  187 -
 src/mesa/drivers/dri/i965/brw_performance_query.c  |  533 --
 src/mesa/drivers/dri/i965/brw_pipe_control.c       |  454 --
 src/mesa/drivers/dri/i965/brw_pipe_control.h       |   95 -
 src/mesa/drivers/dri/i965/brw_pixel.c              |  133 -
 src/mesa/drivers/dri/i965/brw_pixel.h              |   61 -
 src/mesa/drivers/dri/i965/brw_pixel_bitmap.c       |  363 --
 src/mesa/drivers/dri/i965/brw_pixel_copy.c         |  212 -
 src/mesa/drivers/dri/i965/brw_pixel_draw.c         |  178 -
 src/mesa/drivers/dri/i965/brw_pixel_read.c         |  300 -
 src/mesa/drivers/dri/i965/brw_primitive_restart.c  |  462 --
 src/mesa/drivers/dri/i965/brw_program.c            |  888 ---
 src/mesa/drivers/dri/i965/brw_program.h            |  145 -
 src/mesa/drivers/dri/i965/brw_program_binary.c     |  353 --
 src/mesa/drivers/dri/i965/brw_program_cache.c      |  523 --
 src/mesa/drivers/dri/i965/brw_queryobj.c           |  621 --
 src/mesa/drivers/dri/i965/brw_reset.c              |   86 -
 src/mesa/drivers/dri/i965/brw_screen.c             | 2886 ----------
 src/mesa/drivers/dri/i965/brw_screen.h             |  173 -
 src/mesa/drivers/dri/i965/brw_sf.c                 |  171 -
 src/mesa/drivers/dri/i965/brw_state.c              |  119 -
 src/mesa/drivers/dri/i965/brw_state.h              |  370 --
 src/mesa/drivers/dri/i965/brw_state_upload.c       |  789 ---
 src/mesa/drivers/dri/i965/brw_structs.h            |   68 -
 src/mesa/drivers/dri/i965/brw_surface_formats.c    |  558 --
 src/mesa/drivers/dri/i965/brw_sync.c               |  642 ---
 src/mesa/drivers/dri/i965/brw_tcs.c                |  295 -
 src/mesa/drivers/dri/i965/brw_tcs_surface_state.c  |  116 -
 src/mesa/drivers/dri/i965/brw_tes.c                |  233 -
 src/mesa/drivers/dri/i965/brw_tes_surface_state.c  |  116 -
 src/mesa/drivers/dri/i965/brw_tex.c                |  415 --
 src/mesa/drivers/dri/i965/brw_tex.h                |   58 -
 src/mesa/drivers/dri/i965/brw_tex_copy.c           |   72 -
 src/mesa/drivers/dri/i965/brw_tex_image.c          |  992 ----
 src/mesa/drivers/dri/i965/brw_tex_obj.h            |  101 -
 src/mesa/drivers/dri/i965/brw_tex_validate.c       |  223 -
 src/mesa/drivers/dri/i965/brw_upload.c             |  134 -
 src/mesa/drivers/dri/i965/brw_urb.c                |  268 -
 src/mesa/drivers/dri/i965/brw_util.c               |  125 -
 src/mesa/drivers/dri/i965/brw_util.h               |   74 -
 src/mesa/drivers/dri/i965/brw_vs.c                 |  369 --
 src/mesa/drivers/dri/i965/brw_vs.h                 |   57 -
 src/mesa/drivers/dri/i965/brw_vs_surface_state.c   |  119 -
 src/mesa/drivers/dri/i965/brw_wm.c                 |  639 --
 src/mesa/drivers/dri/i965/brw_wm.h                 |   59 -
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c   | 1692 ------
 src/mesa/drivers/dri/i965/genX_blorp_exec.c        |  412 --
 src/mesa/drivers/dri/i965/genX_boilerplate.h       |  160 -
 src/mesa/drivers/dri/i965/genX_pipe_control.c      |  514 --
 src/mesa/drivers/dri/i965/genX_state_upload.c      | 6088 --------------------
 src/mesa/drivers/dri/i965/gfx4_blorp_exec.h        |  196 -
 src/mesa/drivers/dri/i965/gfx6_clip_state.c        |   89 -
 src/mesa/drivers/dri/i965/gfx6_constant_state.c    |  361 --
 src/mesa/drivers/dri/i965/gfx6_multisample_state.c |   61 -
 src/mesa/drivers/dri/i965/gfx6_queryobj.c          |  560 --
 src/mesa/drivers/dri/i965/gfx6_sampler_state.c     |   57 -
 src/mesa/drivers/dri/i965/gfx6_sol.c               |  522 --
 src/mesa/drivers/dri/i965/gfx6_urb.c               |  153 -
 src/mesa/drivers/dri/i965/gfx7_l3_state.c          |  312 -
 src/mesa/drivers/dri/i965/gfx7_sol_state.c         |  149 -
 src/mesa/drivers/dri/i965/gfx7_urb.c               |  280 -
 src/mesa/drivers/dri/i965/gfx8_depth_state.c       |  197 -
 src/mesa/drivers/dri/i965/gfx8_multisample_state.c |   55 -
 src/mesa/drivers/dri/i965/hsw_queryobj.c           |  486 --
 src/mesa/drivers/dri/i965/hsw_sol.c                |  264 -
 src/mesa/drivers/dri/i965/libdrm_macros.h          |   59 -
 src/mesa/drivers/dri/i965/meson.build              |  166 -
 src/mesa/drivers/dri/meson.build                   |    5 -
 124 files changed, 8 insertions(+), 55418 deletions(-)
 delete mode 100644 include/pci_ids/i965_pci_ids.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_batch.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_batch.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_binding_tables.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_blit.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_blit.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_blorp.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_blorp.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_buffer_objects.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_buffer_objects.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_buffers.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_buffers.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_bufmgr.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_bufmgr.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_clear.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_clip.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_compute.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_conditional_render.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_context.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_context.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_copy_image.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_cs.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_cs.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_curbe.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_defines.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_disk_cache.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_draw.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_draw.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_draw_upload.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_extensions.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_fbo.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_fbo.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_ff_gs.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_ff_gs.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_formatquery.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_generate_mipmap.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_gs.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_gs.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_gs_surface_state.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_image.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_link.cpp
 delete mode 100644 src/mesa/drivers/dri/i965/brw_meta_util.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_meta_util.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_mipmap_tree.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_mipmap_tree.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_misc_state.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_multisample_state.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
 delete mode 100644 src/mesa/drivers/dri/i965/brw_object_purgeable.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_performance_query.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_pipe_control.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_pipe_control.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_pixel.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_pixel.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_pixel_bitmap.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_pixel_copy.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_pixel_draw.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_pixel_read.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_primitive_restart.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_program.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_program.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_program_binary.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_program_cache.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_queryobj.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_reset.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_screen.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_screen.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_sf.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_state.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_state.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_state_upload.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_structs.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_surface_formats.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_sync.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_tcs.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_tcs_surface_state.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_tes.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_tes_surface_state.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_tex.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_tex.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_tex_copy.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_tex_image.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_tex_obj.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_tex_validate.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_upload.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_urb.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_util.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_util.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_vs.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_vs.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_vs_surface_state.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_wm.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_wm.h
 delete mode 100644 src/mesa/drivers/dri/i965/brw_wm_surface_state.c
 delete mode 100644 src/mesa/drivers/dri/i965/genX_blorp_exec.c
 delete mode 100644 src/mesa/drivers/dri/i965/genX_boilerplate.h
 delete mode 100644 src/mesa/drivers/dri/i965/genX_pipe_control.c
 delete mode 100644 src/mesa/drivers/dri/i965/genX_state_upload.c
 delete mode 100644 src/mesa/drivers/dri/i965/gfx4_blorp_exec.h
 delete mode 100644 src/mesa/drivers/dri/i965/gfx6_clip_state.c
 delete mode 100644 src/mesa/drivers/dri/i965/gfx6_constant_state.c
 delete mode 100644 src/mesa/drivers/dri/i965/gfx6_multisample_state.c
 delete mode 100644 src/mesa/drivers/dri/i965/gfx6_queryobj.c
 delete mode 100644 src/mesa/drivers/dri/i965/gfx6_sampler_state.c
 delete mode 100644 src/mesa/drivers/dri/i965/gfx6_sol.c
 delete mode 100644 src/mesa/drivers/dri/i965/gfx6_urb.c
 delete mode 100644 src/mesa/drivers/dri/i965/gfx7_l3_state.c
 delete mode 100644 src/mesa/drivers/dri/i965/gfx7_sol_state.c
 delete mode 100644 src/mesa/drivers/dri/i965/gfx7_urb.c
 delete mode 100644 src/mesa/drivers/dri/i965/gfx8_depth_state.c
 delete mode 100644 src/mesa/drivers/dri/i965/gfx8_multisample_state.c
 delete mode 100644 src/mesa/drivers/dri/i965/hsw_queryobj.c
 delete mode 100644 src/mesa/drivers/dri/i965/hsw_sol.c
 delete mode 100644 src/mesa/drivers/dri/i965/libdrm_macros.h
 delete mode 100644 src/mesa/drivers/dri/i965/meson.build

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 8aff723..f8a4c57 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -825,7 +825,6 @@ fedora-release:
       -Wno-error=uninitialized
     CPP_ARGS: >
       -Wno-error=array-bounds
-    DRI_DRIVERS: "i965"
     DRI_LOADERS: >
       -D glx=dri
       -D gbm=enabled
@@ -1039,7 +1038,6 @@ debian-clang:
       -Wno-error=unused-variable
     DRI_LOADERS: >
       -D glvnd=true
-    DRI_DRIVERS: "auto"
     GALLIUM_DRIVERS: "iris,nouveau,kmsro,r300,r600,freedreno,swr,swrast,svga,v3d,vc4,virgl,etnaviv,panfrost,lima,zink,radeonsi,tegra,d3d12,crocus"
     VULKAN_DRIVERS: intel,amd,freedreno,broadcom,virtio-experimental
     CC: clang
@@ -1118,7 +1116,6 @@ debian-i386:
     CROSS: i386
     VULKAN_DRIVERS: intel,amd,swrast,virtio-experimental
     GALLIUM_DRIVERS: "iris,nouveau,r300,r600,radeonsi,swrast,virgl,zink,crocus"
-    DRI_DRIVERS: "i965"
     EXTRA_OPTION: >
       -D vulkan-layers=device-select,overlay
 
@@ -1155,7 +1152,6 @@ debian-mingw32-x86_64:
       -Wno-error=format
       -Wno-error=format-extra-args
     CPP_ARGS: $C_ARGS
-    DRI_DRIVERS: ""
     GALLIUM_DRIVERS: "swrast"
     EXTRA_OPTION: >
       -Dllvm=disabled
diff --git a/.gitlab-ci/meson/build.sh b/.gitlab-ci/meson/build.sh
index 56391e5..d052397 100755
--- a/.gitlab-ci/meson/build.sh
+++ b/.gitlab-ci/meson/build.sh
@@ -68,7 +68,6 @@ meson _build --native-file=native.file \
       -D cpp_args="$(echo -n $CPP_ARGS)" \
       -D libunwind=${UNWIND} \
       ${DRI_LOADERS} \
-      -D dri-drivers=${DRI_DRIVERS:-[]} \
       ${GALLIUM_ST} \
       -D gallium-drivers=${GALLIUM_DRIVERS:-[]} \
       -D vulkan-drivers=${VULKAN_DRIVERS:-[]} \
diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
deleted file mode 100644
index 6c0cb46..0000000
--- a/include/pci_ids/i965_pci_ids.h
+++ /dev/null
@@ -1,260 +0,0 @@
-#ifndef PREFER_CROCUS
-CHIPSET(0x29A2, i965,    "BW",      "Intel(R) 965G")
-CHIPSET(0x2992, i965,    "BW",      "Intel(R) 965Q")
-CHIPSET(0x2982, i965,    "BW",      "Intel(R) 965G")
-CHIPSET(0x2972, i965,    "BW",      "Intel(R) 946GZ")
-CHIPSET(0x2A02, i965,    "CL",      "Intel(R) 965GM")
-CHIPSET(0x2A12, i965,    "CL",      "Intel(R) 965GME/GLE")
-
-CHIPSET(0x2A42, g4x,     "CTG",     "Mobile IntelÂ® GM45 Express Chipset")
-CHIPSET(0x2E02, g4x,     "ELK",     "Intel(R) Integrated Graphics Device")
-CHIPSET(0x2E12, g4x,     "ELK",     "Intel(R) Q45/Q43")
-CHIPSET(0x2E22, g4x,     "ELK",     "Intel(R) G45/G43")
-CHIPSET(0x2E32, g4x,     "ELK",     "Intel(R) G41")
-CHIPSET(0x2E42, g4x,     "ELK",     "Intel(R) B43")
-CHIPSET(0x2E92, g4x,     "ELK",     "Intel(R) B43")
-
-CHIPSET(0x0042, ilk,     "ILK",     "Intel(R) HD Graphics")
-CHIPSET(0x0046, ilk,     "ILK",     "Intel(R) HD Graphics")
-
-CHIPSET(0x0102, snb_gt1, "SNB GT1", "Intel(R) HD Graphics 2000")
-CHIPSET(0x0112, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000")
-CHIPSET(0x0122, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000")
-CHIPSET(0x0106, snb_gt1, "SNB GT1", "Intel(R) HD Graphics 2000")
-CHIPSET(0x0116, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000")
-CHIPSET(0x0126, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000")
-CHIPSET(0x010A, snb_gt1, "SNB GT1", "Intel(R) HD Graphics 2000")
-
-CHIPSET(0x0152, ivb_gt1, "IVB GT1", "Intel(R) HD Graphics 2500")
-CHIPSET(0x0162, ivb_gt2, "IVB GT2", "Intel(R) HD Graphics 4000")
-CHIPSET(0x0156, ivb_gt1, "IVB GT1", "Intel(R) HD Graphics 2500")
-CHIPSET(0x0166, ivb_gt2, "IVB GT2", "Intel(R) HD Graphics 4000")
-CHIPSET(0x015a, ivb_gt1, "IVB GT1", "Intel(R) HD Graphics")
-CHIPSET(0x016a, ivb_gt2, "IVB GT2", "Intel(R) HD Graphics P4000")
-
-CHIPSET(0x0402, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0412, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4600")
-CHIPSET(0x0422, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0406, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0416, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4600")
-CHIPSET(0x0426, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x040A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x041A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics P4600/P4700")
-CHIPSET(0x042A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x040B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x041B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x042B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x040E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x041E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4400")
-CHIPSET(0x042E, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0C02, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0C12, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0C22, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0C06, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0C16, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0C26, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0C0A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0C1A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0C2A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0C0B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0C1B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0C2B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0C0E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0C1E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0C2E, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0A02, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0A12, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0A22, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0A06, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0A16, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4400")
-CHIPSET(0x0A26, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics 5000")
-CHIPSET(0x0A0A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0A1A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0A2A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0A0B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0A1B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0A2B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0A0E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0A1E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4200")
-CHIPSET(0x0A2E, hsw_gt3, "HSW GT3", "Intel(R) Iris(R) Graphics 5100")
-CHIPSET(0x0D02, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0D12, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4600")
-CHIPSET(0x0D22, hsw_gt3, "HSW GT3", "Intel(R) Iris(R) Pro Graphics 5200")
-CHIPSET(0x0D06, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0D16, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0D26, hsw_gt3, "HSW GT3", "Intel(R) Iris(R) Pro Graphics P5200")
-CHIPSET(0x0D0A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0D1A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0D2A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0D0B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0D1B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0D2B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x0D0E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x0D1E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x0D2E, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
-
-CHIPSET(0x0F31, byt,     "BYT",     "Intel(R) HD Graphics")
-CHIPSET(0x0F32, byt,     "BYT",     "Intel(R) HD Graphics")
-CHIPSET(0x0F33, byt,     "BYT",     "Intel(R) HD Graphics")
-CHIPSET(0x0157, byt,     "BYT",     "Intel(R) HD Graphics")
-CHIPSET(0x0155, byt,     "BYT",     "Intel(R) HD Graphics")
-
-CHIPSET(0x22B0, chv,     "CHV",     "Intel(R) HD Graphics")
-CHIPSET(0x22B1, chv,     "BSW",     "Intel(R) HD Graphics XXX") /* Overridden in brw_get_renderer_string */
-CHIPSET(0x22B2, chv,     "CHV",     "Intel(R) HD Graphics")
-CHIPSET(0x22B3, chv,     "CHV",     "Intel(R) HD Graphics")
-#endif
-
-#ifndef PREFER_IRIS
-CHIPSET(0x1602, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x1606, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x160A, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x160B, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x160D, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x160E, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
-CHIPSET(0x1612, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics 5600")
-CHIPSET(0x1616, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics 5500")
-CHIPSET(0x161A, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics P5700")
-CHIPSET(0x161B, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x161D, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics")
-CHIPSET(0x161E, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics 5300")
-CHIPSET(0x1622, bdw_gt3, "BDW GT3", "Intel(R) Iris(R) Pro Graphics 6200")
-CHIPSET(0x1626, bdw_gt3, "BDW GT3", "Intel(R) HD Graphics 6000")
-CHIPSET(0x162A, bdw_gt3, "BDW GT3", "Intel(R) Iris(R) Pro Graphics P6300")
-CHIPSET(0x162B, bdw_gt3, "BDW GT3", "Intel(R) Iris(R) Graphics 6100")
-CHIPSET(0x162D, bdw_gt3, "BDW GT3", "Intel(R) HD Graphics")
-CHIPSET(0x162E, bdw_gt3, "BDW GT3", "Intel(R) HD Graphics")
-
-CHIPSET(0x1902, skl_gt1, "SKL GT1",  "Intel(R) HD Graphics 510")
-CHIPSET(0x1906, skl_gt1, "SKL GT1",  "Intel(R) HD Graphics 510")
-CHIPSET(0x190A, skl_gt1, "SKL GT1",  "Intel(R) HD Graphics")
-CHIPSET(0x190B, skl_gt1, "SKL GT1",  "Intel(R) HD Graphics 510")
-CHIPSET(0x190E, skl_gt1, "SKL GT1",  "Intel(R) HD Graphics")
-CHIPSET(0x1912, skl_gt2, "SKL GT2",  "Intel(R) HD Graphics 530")
-CHIPSET(0x1913, skl_gt2, "SKL GT2F", "Intel(R) HD Graphics")
-CHIPSET(0x1915, skl_gt2, "SKL GT2F", "Intel(R) HD Graphics")
-CHIPSET(0x1916, skl_gt2, "SKL GT2",  "Intel(R) HD Graphics 520")
-CHIPSET(0x1917, skl_gt2, "SKL GT2F", "Intel(R) HD Graphics")
-CHIPSET(0x191A, skl_gt2, "SKL GT2",  "Intel(R) HD Graphics")
-CHIPSET(0x191B, skl_gt2, "SKL GT2",  "Intel(R) HD Graphics 530")
-CHIPSET(0x191D, skl_gt2, "SKL GT2",  "Intel(R) HD Graphics P530")
-CHIPSET(0x191E, skl_gt2, "SKL GT2",  "Intel(R) HD Graphics 515")
-CHIPSET(0x1921, skl_gt2, "SKL GT2",  "Intel(R) HD Graphics 520")
-CHIPSET(0x1923, skl_gt3, "SKL GT3",  "Intel(R) HD Graphics 535")
-CHIPSET(0x1926, skl_gt3, "SKL GT3",  "Intel(R) Iris(R) Graphics 540")
-CHIPSET(0x1927, skl_gt3, "SKL GT3",  "Intel(R) Iris(R) Graphics 550")
-CHIPSET(0x192A, skl_gt4, "SKL GT4",  "Intel(R) HD Graphics")
-CHIPSET(0x192B, skl_gt3, "SKL GT3",  "Intel(R) Iris(R) Graphics 555")
-CHIPSET(0x192D, skl_gt3, "SKL GT3",  "Intel(R) Iris(R) Graphics P555")
-CHIPSET(0x1932, skl_gt4, "SKL GT4",  "Intel(R) Iris(R) Pro Graphics 580")
-CHIPSET(0x193A, skl_gt4, "SKL GT4",  "Intel(R) Iris(R) Pro Graphics P580")
-CHIPSET(0x193B, skl_gt4, "SKL GT4",  "Intel(R) Iris(R) Pro Graphics 580")
-CHIPSET(0x193D, skl_gt4, "SKL GT4",  "Intel(R) Iris(R) Pro Graphics P580")
-
-CHIPSET(0x0A84, bxt,     "BXT 3",    "Intel(R) HD Graphics")
-CHIPSET(0x1A84, bxt,     "BXT 3",    "Intel(R) HD Graphics")
-CHIPSET(0x1A85, bxt_2x6, "BXT 2",    "Intel(R) HD Graphics")
-CHIPSET(0x5A84, bxt,     "APL 3",    "Intel(R) HD Graphics 505")
-CHIPSET(0x5A85, bxt_2x6, "APL 2",    "Intel(R) HD Graphics 500")
-
-CHIPSET(0x3184, glk,     "GLK 3",    "Intel(R) UHD Graphics 605")
-CHIPSET(0x3185, glk_2x6, "GLK 2",    "Intel(R) UHD Graphics 600")
-
-CHIPSET(0x5902, kbl_gt1,   "KBL GT1",   "Intel(R) HD Graphics 610")
-CHIPSET(0x5906, kbl_gt1,   "KBL GT1",   "Intel(R) HD Graphics 610")
-CHIPSET(0x590A, kbl_gt1,   "KBL GT1",   "Intel(R) HD Graphics")
-CHIPSET(0x5908, kbl_gt1,   "KBL GT1",   "Intel(R) HD Graphics")
-CHIPSET(0x590B, kbl_gt1,   "KBL GT1",   "Intel(R) HD Graphics 610")
-CHIPSET(0x590E, kbl_gt1,   "KBL GT1",   "Intel(R) HD Graphics")
-CHIPSET(0x5913, kbl_gt1_5, "KBL GT1.5", "Intel(R) HD Graphics")
-CHIPSET(0x5915, kbl_gt1_5, "KBL GT1.5", "Intel(R) HD Graphics")
-CHIPSET(0x5917, kbl_gt2,   "KBL GT2",   "Intel(R) UHD Graphics 620")
-CHIPSET(0x5912, kbl_gt2,   "KBL GT2",   "Intel(R) HD Graphics 630")
-CHIPSET(0x5916, kbl_gt2,   "KBL GT2",   "Intel(R) HD Graphics 620")
-CHIPSET(0x591A, kbl_gt2,   "KBL GT2",   "Intel(R) HD Graphics P630")
-CHIPSET(0x591B, kbl_gt2,   "KBL GT2",   "Intel(R) HD Graphics 630")
-CHIPSET(0x591D, kbl_gt2,   "KBL GT2",   "Intel(R) HD Graphics P630")
-CHIPSET(0x591E, kbl_gt2,   "KBL GT2",   "Intel(R) HD Graphics 615")
-CHIPSET(0x5921, kbl_gt2,   "KBL GT2F",  "Intel(R) HD Graphics 620")
-CHIPSET(0x5923, kbl_gt3,   "KBL GT3",   "Intel(R) HD Graphics 635")
-CHIPSET(0x5926, kbl_gt3,   "KBL GT3",   "Intel(R) Iris(R) Plus Graphics 640 (Kaby Lake GT3e)")
-CHIPSET(0x5927, kbl_gt3,   "KBL GT3",   "Intel(R) Iris(R) Plus Graphics 650 (Kaby Lake GT3e)")
-CHIPSET(0x593B, kbl_gt4,   "KBL GT4",   "Intel(R) HD Graphics")
-
-CHIPSET(0x591C, kbl_gt2, "AML-KBL", "Intel(R) UHD Graphics 615")
-CHIPSET(0x87C0, kbl_gt2, "AML-KBL", "Intel(R) UHD Graphics 617")
-
-CHIPSET(0x87CA, cfl_gt2, "AML-CFL", "Intel(R) UHD Graphics")
-
-CHIPSET(0x3E90, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610")
-CHIPSET(0x3E93, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610")
-CHIPSET(0x3E99, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610")
-CHIPSET(0x3E9C, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610")
-CHIPSET(0x3E91, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630")
-CHIPSET(0x3E92, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630")
-CHIPSET(0x3E96, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics P630")
-CHIPSET(0x3E98, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630")
-CHIPSET(0x3E9A, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics P630")
-CHIPSET(0x3E9B, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630")
-CHIPSET(0x3E94, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics P630")
-CHIPSET(0x3EA9, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 620")
-CHIPSET(0x3EA5, cfl_gt3, "CFL GT3", "Intel(R) Iris(R) Plus Graphics 655")
-CHIPSET(0x3EA6, cfl_gt3, "CFL GT3", "Intel(R) Iris(R) Plus Graphics 645")
-CHIPSET(0x3EA7, cfl_gt3, "CFL GT3", "Intel(R) HD Graphics")
-CHIPSET(0x3EA8, cfl_gt3, "CFL GT3", "Intel(R) Iris(R) Plus Graphics 655")
-
-CHIPSET(0x3EA1, cfl_gt1, "WHL GT1", "Intel(R) UHD Graphics 610")
-CHIPSET(0x3EA4, cfl_gt1, "WHL GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x3EA0, cfl_gt2, "WHL GT2", "Intel(R) UHD Graphics 620")
-CHIPSET(0x3EA3, cfl_gt2, "WHL GT2", "Intel(R) UHD Graphics")
-CHIPSET(0x3EA2, cfl_gt3, "WHL GT3", "Intel(R) UHD Graphics")
-
-CHIPSET(0x9B21, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x9BA0, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x9BA2, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x9BA4, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x9BA5, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics 610")
-CHIPSET(0x9BA8, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics 610")
-CHIPSET(0x9BAA, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x9BAB, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x9BAC, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
-CHIPSET(0x9B41, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
-CHIPSET(0x9BC0, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
-CHIPSET(0x9BC2, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
-CHIPSET(0x9BC4, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
-CHIPSET(0x9BC5, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics 630")
-CHIPSET(0x9BC6, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics P630")
-CHIPSET(0x9BC8, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics 630")
-CHIPSET(0x9BCA, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
-CHIPSET(0x9BCB, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
-CHIPSET(0x9BCC, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
-CHIPSET(0x9BE6, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics P630")
-CHIPSET(0x9BF6, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics P630")
-
-CHIPSET(0x8A50, icl_gt2,   "ICL GT2",   "Intel(R) HD Graphics")
-CHIPSET(0x8A51, icl_gt2,   "ICL GT2",   "Intel(R) Iris(R) Plus Graphics")
-CHIPSET(0x8A52, icl_gt2,   "ICL GT2",   "Intel(R) Iris(R) Plus Graphics")
-CHIPSET(0x8A53, icl_gt2,   "ICL GT2",   "Intel(R) Iris(R) Plus Graphics")
-CHIPSET(0x8A54, icl_gt1_5, "ICL GT1.5", "Intel(R) Iris(R) Plus Graphics")
-CHIPSET(0x8A56, icl_gt1,   "ICL GT1",   "Intel(R) UHD Graphics")
-CHIPSET(0x8A57, icl_gt1_5, "ICL GT1.5", "Intel(R) HD Graphics")
-CHIPSET(0x8A58, icl_gt1,   "ICL GT1",   "Intel(R) UHD Graphics")
-CHIPSET(0x8A59, icl_gt1_5, "ICL GT1.5", "Intel(R) HD Graphics")
-CHIPSET(0x8A5A, icl_gt1_5, "ICL GT1.5", "Intel(R) Iris(R) Plus Graphics")
-CHIPSET(0x8A5B, icl_gt1,   "ICL GT1",   "Intel(R) HD Graphics")
-CHIPSET(0x8A5C, icl_gt1_5, "ICL GT1.5", "Intel(R) Iris(R) Plus Graphics")
-CHIPSET(0x8A5D, icl_gt1,   "ICL GT1",   "Intel(R) HD Graphics")
-CHIPSET(0x8A71, icl_gt0_5, "ICL GT0.5", "Intel(R) HD Graphics")
-
-CHIPSET(0x4500, ehl_4x8, "EHL", "Intel(R) UHD Graphics")
-CHIPSET(0x4541, ehl_2x4, "EHL", "Intel(R) UHD Graphics")
-CHIPSET(0x4551, ehl_4x4, "EHL", "Intel(R) UHD Graphics")
-CHIPSET(0x4555, ehl_2x8, "EHL", "Intel(R) UHD Graphics")
-CHIPSET(0x4557, ehl_4x5, "EHL", "Intel(R) UHD Graphics")
-CHIPSET(0x4571, ehl_4x8, "EHL", "Intel(R) UHD Graphics")
-CHIPSET(0x4E51, ehl_4x4, "JSL", "Intel(R) UHD Graphics")
-CHIPSET(0x4E55, ehl_2x8, "JSL", "Intel(R) UHD Graphics")
-CHIPSET(0x4E57, ehl_4x5, "JSL", "Intel(R) UHD Graphics")
-CHIPSET(0x4E61, ehl_4x6, "JSL", "Intel(R) UHD Graphics")
-CHIPSET(0x4E71, ehl_4x8, "JSL", "Intel(R) UHD Graphics")
-#endif
diff --git a/meson.build b/meson.build
index 03830e4..8b8ffc9 100644
--- a/meson.build
+++ b/meson.build
@@ -174,28 +174,10 @@ with_shared_glapi = with_shared_glapi and with_any_opengl
 system_has_kms_drm = ['openbsd', 'netbsd', 'freebsd', 'gnu/kfreebsd', 'dragonfly', 'linux', 'sunos'].contains(host_machine.system())
 
 dri_drivers = get_option('dri-drivers')
-if dri_drivers.contains('auto')
-  if system_has_kms_drm
-    # TODO: PPC, Sparc
-    if ['x86', 'x86_64'].contains(host_machine.cpu_family())
-      dri_drivers = ['i965']
-    elif ['arm', 'aarch64', 'mips', 'mips64'].contains(host_machine.cpu_family())
-      dri_drivers = []
-    else
-      error('Unknown architecture @0@. Please pass -Ddri-drivers to set driver options. Patches gladly accepted to fix this.'.format(
-            host_machine.cpu_family()))
-    endif
-  elif ['darwin', 'windows', 'cygwin', 'haiku'].contains(host_machine.system())
-    # only swrast would make sense here, but gallium swrast is a much better default
-    dri_drivers = []
-  else
-    error('Unknown OS @0@. Please pass -Ddri-drivers to set driver options. Patches gladly accepted to fix this.'.format(
-          host_machine.system()))
-  endif
+if dri_drivers.length() != 0
+  error('Mesa\'s main branch no longer has any "classic" drivers, use the "amber" branch instead.')
 endif
 
-with_dri_i965 = dri_drivers.contains('i965')
-
 with_dri = dri_drivers.length() != 0
 
 gallium_drivers = get_option('gallium-drivers')
@@ -205,7 +187,7 @@ if gallium_drivers.contains('auto')
     if ['x86', 'x86_64'].contains(host_machine.cpu_family())
       gallium_drivers = [
         'r300', 'r600', 'radeonsi', 'nouveau', 'virgl', 'svga', 'swrast',
-        'iris', 'crocus'
+        'iris', 'crocus', 'i915'
       ]
     elif ['arm', 'aarch64'].contains(host_machine.cpu_family())
       gallium_drivers = [
@@ -293,7 +275,7 @@ with_broadcom_vk = _vulkan_drivers.contains('broadcom')
 with_any_vk = _vulkan_drivers.length() != 0
 
 with_any_broadcom = with_gallium_vc4 or with_gallium_v3d or with_broadcom_vk
-with_any_intel = with_dri_i965 or with_intel_vk or with_gallium_iris or with_gallium_crocus
+with_any_intel = with_intel_vk or with_gallium_iris or with_gallium_crocus
 
 if with_swrast_vk and not with_gallium_softpipe
   error('swrast vulkan requires gallium swrast')
@@ -1493,8 +1475,6 @@ if cc.has_function('dl_iterate_phdr')
   pre_args += '-DHAVE_DL_ITERATE_PHDR'
 elif with_intel_vk
   error('Intel "Anvil" Vulkan driver requires the dl_iterate_phdr function')
-elif with_dri_i965 and with_shader_cache
-  error('Intel i965 GL driver requires dl_iterate_phdr when built with shader caching.')
 endif
 
 # Determine whether or not the rt library is needed for time functions
diff --git a/meson_options.txt b/meson_options.txt
index ac49808..5278e17 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -54,9 +54,7 @@ option(
 option(
   'dri-drivers',
   type : 'array',
-  value : ['auto'],
-  choices : ['auto', 'i965'],
-  description : 'List of dri drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
+  description : 'DEPRECATED: List of dri drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
 )
 option(
   'dri-drivers-path',
@@ -455,18 +453,6 @@ option(
   value : true,
   description : 'Enable direct rendering in GLX and EGL for DRI',
 )
-option(
-  'prefer-iris',
-  type : 'boolean',
-  value : true,
-  description : 'Prefer new Intel iris driver over older i965 driver'
-)
-option(
-  'prefer-crocus',
-  type : 'boolean',
-  value : false,
-  description : 'Prefer new crocus driver over older i965 driver for gen4-7'
-)
 option('egl-lib-suffix',
   type : 'string',
   value : '',
diff --git a/src/intel/dev/intel_device_info.c b/src/intel/dev/intel_device_info.c
index fc2d0d0..4fb99fe 100644
--- a/src/intel/dev/intel_device_info.c
+++ b/src/intel/dev/intel_device_info.c
@@ -1247,7 +1247,7 @@ intel_get_device_info_from_pci_id(int pci_id,
 #undef CHIPSET
 #define CHIPSET(id, family, fam_str, name) \
       case id: *devinfo = intel_device_info_##family; break;
-#include "pci_ids/i965_pci_ids.h"
+#include "pci_ids/crocus_pci_ids.h"
 #include "pci_ids/iris_pci_ids.h"
 
 #undef CHIPSET
@@ -1269,7 +1269,7 @@ intel_get_device_info_from_pci_id(int pci_id,
                     sizeof(devinfo->name)); \
       strncpy(devinfo->name, _name " (" _fam_str ")", sizeof(devinfo->name)); \
       break;
-#include "pci_ids/i965_pci_ids.h"
+#include "pci_ids/crocus_pci_ids.h"
 #include "pci_ids/iris_pci_ids.h"
    default:
       strncpy(devinfo->name, "Intel Unknown", sizeof(devinfo->name));
diff --git a/src/intel/dev/intel_device_info_test.c b/src/intel/dev/intel_device_info_test.c
index 236310d..c65f7a7 100644
--- a/src/intel/dev/intel_device_info_test.c
+++ b/src/intel/dev/intel_device_info_test.c
@@ -14,13 +14,8 @@ main(int argc, char *argv[])
    } chipsets[] = {
 #undef CHIPSET
 #define CHIPSET(id, family, family_str, str_name) { .pci_id = id, .name = str_name, },
-#include "pci_ids/crocus_pci_ids.h"
-#include "pci_ids/i965_pci_ids.h"
 #include "pci_ids/iris_pci_ids.h"
-#undef CHIPSET
-#define CHIPSET(id, fam_str, str_name) { .pci_id = id, .name = str_name, },
-#include "pci_ids/i915_pci_ids.h"
-#undef CHIPSET
+#include "pci_ids/crocus_pci_ids.h"
    };
 
    for (uint32_t i = 0; i < ARRAY_SIZE(chipsets); i++) {
diff --git a/src/loader/meson.build b/src/loader/meson.build
index 0a529d8..6334cb9 100644
--- a/src/loader/meson.build
+++ b/src/loader/meson.build
@@ -40,15 +40,6 @@ loader_c_args = [
   '-DUSE_DRICONF',
   '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path),
 ]
-
-if get_option('prefer-iris')
-  loader_c_args += ['-DPREFER_IRIS']
-endif
-
-if get_option('prefer-crocus')
-  loader_c_args += ['-DPREFER_CROCUS']
-endif
-
 libloader = static_library(
   'loader',
   ['loader_dri_helper.c', 'loader.c'],
diff --git a/src/loader/pci_id_driver_map.h b/src/loader/pci_id_driver_map.h
index 544f192..5ffcf05 100644
--- a/src/loader/pci_id_driver_map.h
+++ b/src/loader/pci_id_driver_map.h
@@ -8,12 +8,6 @@
 #  error "Only include from loader.c"
 #endif
 
-static const int i965_chip_ids[] = {
-#define CHIPSET(chip, family, family_str, name) chip,
-#include "pci_ids/i965_pci_ids.h"
-#undef CHIPSET
-};
-
 static const int crocus_chip_ids[] = {
 #define CHIPSET(chip, family, family_str, name) chip,
 #include "pci_ids/crocus_pci_ids.h"
@@ -53,7 +47,6 @@ static const struct {
    int num_chips_ids;
    bool (*predicate)(int fd);
 } driver_map[] = {
-   { 0x8086, "i965", i965_chip_ids, ARRAY_SIZE(i965_chip_ids) },
    { 0x8086, "crocus", crocus_chip_ids, ARRAY_SIZE(crocus_chip_ids) },
    { 0x8086, "iris", NULL, -1, is_kernel_i915 },
    { 0x1002, "r300", r300_chip_ids, ARRAY_SIZE(r300_chip_ids) },
diff --git a/src/mesa/drivers/dri/i965/brw_batch.c b/src/mesa/drivers/dri/i965/brw_batch.c
deleted file mode 100644
index 9955018..0000000
--- a/src/mesa/drivers/dri/i965/brw_batch.c
+++ /dev/null
@@ -1,1332 +0,0 @@
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-#include "brw_bufmgr.h"
-#include "brw_buffers.h"
-#include "brw_fbo.h"
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "common/intel_decoder.h"
-#include "common/intel_gem.h"
-
-#include "util/hash_table.h"
-
-#include <xf86drm.h>
-#include "drm-uapi/i915_drm.h"
-
-#define FILE_DEBUG_FLAG DEBUG_BUFMGR
-
-/**
- * Target sizes of the batch and state buffers.  We create the initial
- * buffers at these sizes, and flush when they're nearly full.  If we
- * underestimate how close we are to the end, and suddenly need more space
- * in the middle of a draw, we can grow the buffers, and finish the draw.
- * At that point, we'll be over our target size, so the next operation
- * should flush.  Each time we flush the batch, we recreate both buffers
- * at the original target size, so it doesn't grow without bound.
- */
-#define BATCH_SZ (20 * 1024)
-#define STATE_SZ (16 * 1024)
-
-static void
-brw_batch_reset(struct brw_context *brw);
-static void
-brw_new_batch(struct brw_context *brw);
-
-static unsigned
-num_fences(struct brw_batch *batch)
-{
-   return util_dynarray_num_elements(&batch->exec_fences,
-                                     struct drm_i915_gem_exec_fence);
-}
-
-
-static void
-dump_validation_list(struct brw_batch *batch)
-{
-   fprintf(stderr, "Validation list (length %d):\n", batch->exec_count);
-
-   for (int i = 0; i < batch->exec_count; i++) {
-      uint64_t flags = batch->validation_list[i].flags;
-      assert(batch->validation_list[i].handle ==
-             batch->exec_bos[i]->gem_handle);
-      fprintf(stderr, "[%2d]: %2d %-14s %p %s%-7s @ 0x%"PRIx64"%s (%"PRIu64"B)\n",
-              i,
-              batch->validation_list[i].handle,
-              batch->exec_bos[i]->name,
-              batch->exec_bos[i],
-              (flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) ? "(48b" : "(32b",
-              (flags & EXEC_OBJECT_WRITE) ? " write)" : ")",
-              (uint64_t)batch->validation_list[i].offset,
-              (flags & EXEC_OBJECT_PINNED) ? " (pinned)" : "",
-              batch->exec_bos[i]->size);
-   }
-}
-
-static struct intel_batch_decode_bo
-decode_get_bo(void *v_brw, bool ppgtt, uint64_t address)
-{
-   struct brw_context *brw = v_brw;
-   struct brw_batch *batch = &brw->batch;
-
-   for (int i = 0; i < batch->exec_count; i++) {
-      struct brw_bo *bo = batch->exec_bos[i];
-      /* The decoder zeroes out the top 16 bits, so we need to as well */
-      uint64_t bo_address = bo->gtt_offset & (~0ull >> 16);
-
-      if (address >= bo_address && address < bo_address + bo->size) {
-         return (struct intel_batch_decode_bo) {
-            .addr = bo_address,
-            .size = bo->size,
-            .map = brw_bo_map(brw, bo, MAP_READ),
-         };
-      }
-   }
-
-   return (struct intel_batch_decode_bo) { };
-}
-
-static unsigned
-decode_get_state_size(void *v_brw, uint64_t address, uint64_t base_address)
-{
-   struct brw_context *brw = v_brw;
-   struct brw_batch *batch = &brw->batch;
-   unsigned size = (uintptr_t)
-      _mesa_hash_table_u64_search(batch->state_batch_sizes,
-                                  address - base_address);
-   return size;
-}
-
-static void
-init_reloc_list(struct brw_reloc_list *rlist, int count)
-{
-   rlist->reloc_count = 0;
-   rlist->reloc_array_size = count;
-   rlist->relocs = malloc(rlist->reloc_array_size *
-                          sizeof(struct drm_i915_gem_relocation_entry));
-}
-
-void
-brw_batch_init(struct brw_context *brw)
-{
-   struct brw_screen *screen = brw->screen;
-   struct brw_batch *batch = &brw->batch;
-   const struct intel_device_info *devinfo = &screen->devinfo;
-
-   if (INTEL_DEBUG(DEBUG_BATCH)) {
-      /* The shadow doesn't get relocs written so state decode fails. */
-      batch->use_shadow_copy = false;
-   } else
-      batch->use_shadow_copy = !devinfo->has_llc;
-
-   init_reloc_list(&batch->batch_relocs, 250);
-   init_reloc_list(&batch->state_relocs, 250);
-
-   batch->batch.map = NULL;
-   batch->state.map = NULL;
-   batch->exec_count = 0;
-   batch->exec_array_size = 100;
-   batch->exec_bos =
-      malloc(batch->exec_array_size * sizeof(batch->exec_bos[0]));
-   batch->validation_list =
-      malloc(batch->exec_array_size * sizeof(batch->validation_list[0]));
-   batch->contains_fence_signal = false;
-
-   if (INTEL_DEBUG(DEBUG_BATCH)) {
-      batch->state_batch_sizes =
-         _mesa_hash_table_u64_create(NULL);
-
-      const unsigned decode_flags =
-         INTEL_BATCH_DECODE_FULL |
-         (INTEL_DEBUG(DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0) |
-         INTEL_BATCH_DECODE_OFFSETS |
-         INTEL_BATCH_DECODE_FLOATS;
-
-      intel_batch_decode_ctx_init(&batch->decoder, devinfo, stderr,
-                                  decode_flags, NULL, decode_get_bo,
-                                  decode_get_state_size, brw);
-      batch->decoder.max_vbo_decoded_lines = 100;
-   }
-
-   batch->use_batch_first =
-      screen->kernel_features & KERNEL_ALLOWS_EXEC_BATCH_FIRST;
-
-   /* PIPE_CONTROL needs a w/a but only on gfx6 */
-   batch->valid_reloc_flags = EXEC_OBJECT_WRITE;
-   if (devinfo->ver == 6)
-      batch->valid_reloc_flags |= EXEC_OBJECT_NEEDS_GTT;
-
-   brw_batch_reset(brw);
-}
-
-#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
-
-static unsigned
-add_exec_bo(struct brw_batch *batch, struct brw_bo *bo)
-{
-   assert(bo->bufmgr == batch->batch.bo->bufmgr);
-
-   unsigned index = READ_ONCE(bo->index);
-
-   if (index < batch->exec_count && batch->exec_bos[index] == bo)
-      return index;
-
-   /* May have been shared between multiple active batches */
-   for (index = 0; index < batch->exec_count; index++) {
-      if (batch->exec_bos[index] == bo)
-         return index;
-   }
-
-   brw_bo_reference(bo);
-
-   if (batch->exec_count == batch->exec_array_size) {
-      batch->exec_array_size *= 2;
-      batch->exec_bos =
-         realloc(batch->exec_bos,
-                 batch->exec_array_size * sizeof(batch->exec_bos[0]));
-      batch->validation_list =
-         realloc(batch->validation_list,
-                 batch->exec_array_size * sizeof(batch->validation_list[0]));
-   }
-
-   batch->validation_list[batch->exec_count] =
-      (struct drm_i915_gem_exec_object2) {
-         .handle = bo->gem_handle,
-         .offset = bo->gtt_offset,
-         .flags = bo->kflags,
-      };
-
-   bo->index = batch->exec_count;
-   batch->exec_bos[batch->exec_count] = bo;
-   batch->aperture_space += bo->size;
-
-   return batch->exec_count++;
-}
-
-static void
-recreate_growing_buffer(struct brw_context *brw,
-                        struct brw_growing_bo *grow,
-                        const char *name, unsigned size,
-                        enum brw_memory_zone memzone)
-{
-   struct brw_screen *screen = brw->screen;
-   struct brw_batch *batch = &brw->batch;
-   struct brw_bufmgr *bufmgr = screen->bufmgr;
-
-   /* We can't grow buffers when using softpin, so just overallocate them. */
-   if (brw_using_softpin(bufmgr))
-      size *= 2;
-
-   grow->bo = brw_bo_alloc(bufmgr, name, size, memzone);
-   grow->bo->kflags |= can_do_exec_capture(screen) ? EXEC_OBJECT_CAPTURE : 0;
-   grow->partial_bo = NULL;
-   grow->partial_bo_map = NULL;
-   grow->partial_bytes = 0;
-   grow->memzone = memzone;
-
-   if (batch->use_shadow_copy)
-      grow->map = realloc(grow->map, grow->bo->size);
-   else
-      grow->map = brw_bo_map(brw, grow->bo, MAP_READ | MAP_WRITE);
-}
-
-static void
-brw_batch_reset(struct brw_context *brw)
-{
-   struct brw_batch *batch = &brw->batch;
-
-   if (batch->last_bo != NULL) {
-      brw_bo_unreference(batch->last_bo);
-      batch->last_bo = NULL;
-   }
-   batch->last_bo = batch->batch.bo;
-
-   recreate_growing_buffer(brw, &batch->batch, "batchbuffer", BATCH_SZ,
-                           BRW_MEMZONE_OTHER);
-   batch->map_next = batch->batch.map;
-
-   recreate_growing_buffer(brw, &batch->state, "statebuffer", STATE_SZ,
-                           BRW_MEMZONE_DYNAMIC);
-
-   /* Avoid making 0 a valid state offset - otherwise the decoder will try
-    * and decode data when we use offset 0 as a null pointer.
-    */
-   batch->state_used = 1;
-
-   add_exec_bo(batch, batch->batch.bo);
-   assert(batch->batch.bo->index == 0);
-
-   batch->needs_sol_reset = false;
-   batch->state_base_address_emitted = false;
-
-   if (batch->state_batch_sizes)
-      _mesa_hash_table_u64_clear(batch->state_batch_sizes);
-
-   /* Always add workaround_bo which contains a driver identifier to be
-    * recorded in error states.
-    */
-   struct brw_bo *identifier_bo = brw->workaround_bo;
-   if (identifier_bo)
-      add_exec_bo(batch, identifier_bo);
-
-   if (batch->contains_fence_signal)
-      batch->contains_fence_signal = false;
-}
-
-static void
-brw_batch_reset_and_clear_render_cache(struct brw_context *brw)
-{
-   brw_batch_reset(brw);
-   brw_cache_sets_clear(brw);
-}
-
-void
-brw_batch_save_state(struct brw_context *brw)
-{
-   brw->batch.saved.map_next = brw->batch.map_next;
-   brw->batch.saved.batch_reloc_count = brw->batch.batch_relocs.reloc_count;
-   brw->batch.saved.state_reloc_count = brw->batch.state_relocs.reloc_count;
-   brw->batch.saved.exec_count = brw->batch.exec_count;
-}
-
-bool
-brw_batch_saved_state_is_empty(struct brw_context *brw)
-{
-   struct brw_batch *batch = &brw->batch;
-   return (batch->saved.map_next == batch->batch.map);
-}
-
-void
-brw_batch_reset_to_saved(struct brw_context *brw)
-{
-   for (int i = brw->batch.saved.exec_count;
-        i < brw->batch.exec_count; i++) {
-      brw_bo_unreference(brw->batch.exec_bos[i]);
-   }
-   brw->batch.batch_relocs.reloc_count = brw->batch.saved.batch_reloc_count;
-   brw->batch.state_relocs.reloc_count = brw->batch.saved.state_reloc_count;
-   brw->batch.exec_count = brw->batch.saved.exec_count;
-
-   brw->batch.map_next = brw->batch.saved.map_next;
-   if (USED_BATCH(brw->batch) == 0)
-      brw_new_batch(brw);
-}
-
-void
-brw_batch_free(struct brw_batch *batch)
-{
-   if (batch->use_shadow_copy) {
-      free(batch->batch.map);
-      free(batch->state.map);
-   }
-
-   for (int i = 0; i < batch->exec_count; i++) {
-      brw_bo_unreference(batch->exec_bos[i]);
-   }
-   free(batch->batch_relocs.relocs);
-   free(batch->state_relocs.relocs);
-   free(batch->exec_bos);
-   free(batch->validation_list);
-
-   brw_bo_unreference(batch->last_bo);
-   brw_bo_unreference(batch->batch.bo);
-   brw_bo_unreference(batch->state.bo);
-   if (batch->state_batch_sizes) {
-      _mesa_hash_table_u64_destroy(batch->state_batch_sizes);
-      intel_batch_decode_ctx_finish(&batch->decoder);
-   }
-}
-
-/**
- * Finish copying the old batch/state buffer's contents to the new one
- * after we tried to "grow" the buffer in an earlier operation.
- */
-static void
-finish_growing_bos(struct brw_growing_bo *grow)
-{
-   struct brw_bo *old_bo = grow->partial_bo;
-   if (!old_bo)
-      return;
-
-   memcpy(grow->map, grow->partial_bo_map, grow->partial_bytes);
-
-   grow->partial_bo = NULL;
-   grow->partial_bo_map = NULL;
-   grow->partial_bytes = 0;
-
-   brw_bo_unreference(old_bo);
-}
-
-static void
-replace_bo_in_reloc_list(struct brw_reloc_list *rlist,
-                         uint32_t old_handle, uint32_t new_handle)
-{
-   for (int i = 0; i < rlist->reloc_count; i++) {
-      if (rlist->relocs[i].target_handle == old_handle)
-         rlist->relocs[i].target_handle = new_handle;
-   }
-}
-
-/**
- * Grow either the batch or state buffer to a new larger size.
- *
- * We can't actually grow buffers, so we allocate a new one, copy over
- * the existing contents, and update our lists to refer to the new one.
- *
- * Note that this is only temporary - each new batch recreates the buffers
- * at their original target size (BATCH_SZ or STATE_SZ).
- */
-static void
-grow_buffer(struct brw_context *brw,
-            struct brw_growing_bo *grow,
-            unsigned existing_bytes,
-            unsigned new_size)
-{
-   struct brw_batch *batch = &brw->batch;
-   struct brw_bufmgr *bufmgr = brw->bufmgr;
-   struct brw_bo *bo = grow->bo;
-
-   /* We can't grow buffers that are softpinned, as the growing mechanism
-    * involves putting a larger buffer at the same gtt_offset...and we've
-    * only allocated the smaller amount of VMA.  Without relocations, this
-    * simply won't work.  This should never happen, however.
-    */
-   assert(!(bo->kflags & EXEC_OBJECT_PINNED));
-
-   perf_debug("Growing %s - ran out of space\n", bo->name);
-
-   if (grow->partial_bo) {
-      /* We've already grown once, and now we need to do it again.
-       * Finish our last grow operation so we can start a new one.
-       * This should basically never happen.
-       */
-      perf_debug("Had to grow multiple times");
-      finish_growing_bos(grow);
-   }
-
-   struct brw_bo *new_bo =
-      brw_bo_alloc(bufmgr, bo->name, new_size, grow->memzone);
-
-   /* Copy existing data to the new larger buffer */
-   grow->partial_bo_map = grow->map;
-
-   if (batch->use_shadow_copy) {
-      /* We can't safely use realloc, as it may move the existing buffer,
-       * breaking existing pointers the caller may still be using.  Just
-       * malloc a new copy and memcpy it like the normal BO path.
-       *
-       * Use bo->size rather than new_size because the bufmgr may have
-       * rounded up the size, and we want the shadow size to match.
-       */
-      grow->map = malloc(new_bo->size);
-   } else {
-      grow->map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE);
-   }
-
-   /* Try to put the new BO at the same GTT offset as the old BO (which
-    * we're throwing away, so it doesn't need to be there).
-    *
-    * This guarantees that our relocations continue to work: values we've
-    * already written into the buffer, values we're going to write into the
-    * buffer, and the validation/relocation lists all will match.
-    *
-    * Also preserve kflags for EXEC_OBJECT_CAPTURE.
-    */
-   new_bo->gtt_offset = bo->gtt_offset;
-   new_bo->index = bo->index;
-   new_bo->kflags = bo->kflags;
-
-   /* Batch/state buffers are per-context, and if we've run out of space,
-    * we must have actually used them before, so...they will be in the list.
-    */
-   assert(bo->index < batch->exec_count);
-   assert(batch->exec_bos[bo->index] == bo);
-
-   /* Update the validation list to use the new BO. */
-   batch->validation_list[bo->index].handle = new_bo->gem_handle;
-
-   if (!batch->use_batch_first) {
-      /* We're not using I915_EXEC_HANDLE_LUT, which means we need to go
-       * update the relocation list entries to point at the new BO as well.
-       * (With newer kernels, the "handle" is an offset into the validation
-       * list, which remains unchanged, so we can skip this.)
-       */
-      replace_bo_in_reloc_list(&batch->batch_relocs,
-                               bo->gem_handle, new_bo->gem_handle);
-      replace_bo_in_reloc_list(&batch->state_relocs,
-                               bo->gem_handle, new_bo->gem_handle);
-   }
-
-   /* Exchange the two BOs...without breaking pointers to the old BO.
-    *
-    * Consider this scenario:
-    *
-    * 1. Somebody calls brw_state_batch() to get a region of memory, and
-    *    and then creates a brw_address pointing to brw->batch.state.bo.
-    * 2. They then call brw_state_batch() a second time, which happens to
-    *    grow and replace the state buffer.  They then try to emit a
-    *    relocation to their first section of memory.
-    *
-    * If we replace the brw->batch.state.bo pointer at step 2, we would
-    * break the address created in step 1.  They'd have a pointer to the
-    * old destroyed BO.  Emitting a relocation would add this dead BO to
-    * the validation list...causing /both/ statebuffers to be in the list,
-    * and all kinds of disasters.
-    *
-    * This is not a contrived case - BLORP vertex data upload hits this.
-    *
-    * There are worse scenarios too.  Fences for GL sync objects reference
-    * brw->batch.batch.bo.  If we replaced the batch pointer when growing,
-    * we'd need to chase down every fence and update it to point to the
-    * new BO.  Otherwise, it would refer to a "batch" that never actually
-    * gets submitted, and would fail to trigger.
-    *
-    * To work around both of these issues, we transmutate the buffers in
-    * place, making the existing struct brw_bo represent the new buffer,
-    * and "new_bo" represent the old BO.  This is highly unusual, but it
-    * seems like a necessary evil.
-    *
-    * We also defer the memcpy of the existing batch's contents.  Callers
-    * may make multiple brw_state_batch calls, and retain pointers to the
-    * old BO's map.  We'll perform the memcpy in finish_growing_bo() when
-    * we finally submit the batch, at which point we've finished uploading
-    * state, and nobody should have any old references anymore.
-    *
-    * To do that, we keep a reference to the old BO in grow->partial_bo,
-    * and store the number of bytes to copy in grow->partial_bytes.  We
-    * can monkey with the refcounts directly without atomics because these
-    * are per-context BOs and they can only be touched by this thread.
-    */
-   assert(new_bo->refcount == 1);
-   new_bo->refcount = bo->refcount;
-   bo->refcount = 1;
-
-   assert(list_is_empty(&bo->exports));
-   assert(list_is_empty(&new_bo->exports));
-
-   struct brw_bo tmp;
-   memcpy(&tmp, bo, sizeof(struct brw_bo));
-   memcpy(bo, new_bo, sizeof(struct brw_bo));
-   memcpy(new_bo, &tmp, sizeof(struct brw_bo));
-
-   list_inithead(&bo->exports);
-   list_inithead(&new_bo->exports);
-
-   grow->partial_bo = new_bo; /* the one reference of the OLD bo */
-   grow->partial_bytes = existing_bytes;
-}
-
-void
-brw_batch_require_space(struct brw_context *brw, GLuint sz)
-{
-   struct brw_batch *batch = &brw->batch;
-
-   const unsigned batch_used = USED_BATCH(*batch) * 4;
-   if (batch_used + sz >= BATCH_SZ && !batch->no_wrap) {
-      brw_batch_flush(brw);
-   } else if (batch_used + sz >= batch->batch.bo->size) {
-      const unsigned new_size =
-         MIN2(batch->batch.bo->size + batch->batch.bo->size / 2,
-              MAX_BATCH_SIZE);
-      grow_buffer(brw, &batch->batch, batch_used, new_size);
-      batch->map_next = (void *) batch->batch.map + batch_used;
-      assert(batch_used + sz < batch->batch.bo->size);
-   }
-}
-
-/**
- * Called when starting a new batch buffer.
- */
-static void
-brw_new_batch(struct brw_context *brw)
-{
-   /* Unreference any BOs held by the previous batch, and reset counts. */
-   for (int i = 0; i < brw->batch.exec_count; i++) {
-      brw_bo_unreference(brw->batch.exec_bos[i]);
-      brw->batch.exec_bos[i] = NULL;
-   }
-   brw->batch.batch_relocs.reloc_count = 0;
-   brw->batch.state_relocs.reloc_count = 0;
-   brw->batch.exec_count = 0;
-   brw->batch.aperture_space = 0;
-
-   brw_bo_unreference(brw->batch.state.bo);
-
-   /* Create a new batchbuffer and reset the associated state: */
-   brw_batch_reset_and_clear_render_cache(brw);
-
-   /* If the kernel supports hardware contexts, then most hardware state is
-    * preserved between batches; we only need to re-emit state that is required
-    * to be in every batch.  Otherwise we need to re-emit all the state that
-    * would otherwise be stored in the context (which for all intents and
-    * purposes means everything).
-    */
-   if (brw->hw_ctx == 0) {
-      brw->ctx.NewDriverState |= BRW_NEW_CONTEXT;
-      brw_upload_invariant_state(brw);
-   }
-
-   brw->ctx.NewDriverState |= BRW_NEW_BATCH;
-
-   brw->ib.index_size = -1;
-
-   /* We need to periodically reap the shader time results, because rollover
-    * happens every few seconds.  We also want to see results every once in a
-    * while, because many programs won't cleanly destroy our context, so the
-    * end-of-run printout may not happen.
-    */
-   if (INTEL_DEBUG(DEBUG_SHADER_TIME))
-      brw_collect_and_report_shader_time(brw);
-
-   brw_batch_maybe_noop(brw);
-}
-
-/**
- * Called from brw_batch_flush before emitting MI_BATCHBUFFER_END and
- * sending it off.
- *
- * This function can emit state (say, to preserve registers that aren't saved
- * between batches).
- */
-static void
-brw_finish_batch(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   brw->batch.no_wrap = true;
-
-   /* Capture the closing pipeline statistics register values necessary to
-    * support query objects (in the non-hardware context world).
-    */
-   brw_emit_query_end(brw);
-
-   /* Work around L3 state leaks into contexts set MI_RESTORE_INHIBIT which
-    * assume that the L3 cache is configured according to the hardware
-    * defaults.  On Kernel 4.16+, we no longer need to do this.
-    */
-   if (devinfo->ver >= 7 &&
-       !(brw->screen->kernel_features & KERNEL_ALLOWS_CONTEXT_ISOLATION))
-      gfx7_restore_default_l3_config(brw);
-
-   if (devinfo->platform == INTEL_PLATFORM_HSW) {
-      /* From the Haswell PRM, Volume 2b, Command Reference: Instructions,
-       * 3DSTATE_CC_STATE_POINTERS > "Note":
-       *
-       * "SW must program 3DSTATE_CC_STATE_POINTERS command at the end of every
-       *  3D batch buffer followed by a PIPE_CONTROL with RC flush and CS stall."
-       *
-       * From the example in the docs, it seems to expect a regular pipe control
-       * flush here as well. We may have done it already, but meh.
-       *
-       * See also WaAvoidRCZCounterRollover.
-       */
-      brw_emit_mi_flush(brw);
-      BEGIN_BATCH(2);
-      OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
-      OUT_BATCH(brw->cc.state_offset | 1);
-      ADVANCE_BATCH();
-      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                                       PIPE_CONTROL_CS_STALL);
-   }
-
-   /* Do not restore push constant packets during context restore. */
-   if (devinfo->ver >= 7)
-      gfx7_emit_isp_disable(brw);
-
-   /* Emit MI_BATCH_BUFFER_END to finish our batch.  Note that execbuf2
-    * requires our batch size to be QWord aligned, so we pad it out if
-    * necessary by emitting an extra MI_NOOP after the end.
-    */
-   brw_batch_require_space(brw, 8);
-   *brw->batch.map_next++ = MI_BATCH_BUFFER_END;
-   if (USED_BATCH(brw->batch) & 1) {
-      *brw->batch.map_next++ = MI_NOOP;
-   }
-
-   brw->batch.no_wrap = false;
-}
-
-static void
-throttle(struct brw_context *brw)
-{
-   /* Wait for the swapbuffers before the one we just emitted, so we
-    * don't get too many swaps outstanding for apps that are GPU-heavy
-    * but not CPU-heavy.
-    *
-    * We're using intelDRI2Flush (called from the loader before
-    * swapbuffer) and glFlush (for front buffer rendering) as the
-    * indicator that a frame is done and then throttle when we get
-    * here as we prepare to render the next frame.  At this point for
-    * round trips for swap/copy and getting new buffers are done and
-    * we'll spend less time waiting on the GPU.
-    *
-    * Unfortunately, we don't have a handle to the batch containing
-    * the swap, and getting our hands on that doesn't seem worth it,
-    * so we just use the first batch we emitted after the last swap.
-    */
-   if (brw->need_swap_throttle && brw->throttle_batch[0]) {
-      if (brw->throttle_batch[1]) {
-         if (!brw->disable_throttling) {
-            brw_bo_wait_rendering(brw->throttle_batch[1]);
-         }
-         brw_bo_unreference(brw->throttle_batch[1]);
-      }
-      brw->throttle_batch[1] = brw->throttle_batch[0];
-      brw->throttle_batch[0] = NULL;
-      brw->need_swap_throttle = false;
-      /* Throttling here is more precise than the throttle ioctl, so skip it */
-      brw->need_flush_throttle = false;
-   }
-
-   if (brw->need_flush_throttle) {
-      drmCommandNone(brw->screen->fd, DRM_I915_GEM_THROTTLE);
-      brw->need_flush_throttle = false;
-   }
-}
-
-static int
-execbuffer(int fd,
-           struct brw_batch *batch,
-           uint32_t ctx_id,
-           int used,
-           int in_fence,
-           int *out_fence,
-           int flags)
-{
-   struct drm_i915_gem_execbuffer2 execbuf = {
-      .buffers_ptr = (uintptr_t) batch->validation_list,
-      .buffer_count = batch->exec_count,
-      .batch_start_offset = 0,
-      .batch_len = used,
-      .flags = flags,
-      .rsvd1 = ctx_id, /* rsvd1 is actually the context ID */
-   };
-
-   unsigned long cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2;
-
-   if (in_fence != -1) {
-      execbuf.rsvd2 = in_fence;
-      execbuf.flags |= I915_EXEC_FENCE_IN;
-   }
-
-   if (out_fence != NULL) {
-      cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2_WR;
-      *out_fence = -1;
-      execbuf.flags |= I915_EXEC_FENCE_OUT;
-   }
-
-   if (num_fences(batch)) {
-      execbuf.flags |= I915_EXEC_FENCE_ARRAY;
-      execbuf.num_cliprects = num_fences(batch);
-      execbuf.cliprects_ptr =
-         (uintptr_t)util_dynarray_begin(&batch->exec_fences);
-   }
-
-
-   int ret = drmIoctl(fd, cmd, &execbuf);
-   if (ret != 0)
-      ret = -errno;
-
-   for (int i = 0; i < batch->exec_count; i++) {
-      struct brw_bo *bo = batch->exec_bos[i];
-
-      bo->idle = false;
-      bo->index = -1;
-
-      /* Update brw_bo::gtt_offset */
-      if (batch->validation_list[i].offset != bo->gtt_offset) {
-         DBG("BO %d migrated: 0x%" PRIx64 " -> 0x%" PRIx64 "\n",
-             bo->gem_handle, bo->gtt_offset,
-             (uint64_t)batch->validation_list[i].offset);
-         assert(!(bo->kflags & EXEC_OBJECT_PINNED));
-         bo->gtt_offset = batch->validation_list[i].offset;
-      }
-   }
-
-   if (ret == 0 && out_fence != NULL)
-      *out_fence = execbuf.rsvd2 >> 32;
-
-   return ret;
-}
-
-static int
-submit_batch(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
-{
-   struct brw_batch *batch = &brw->batch;
-   int ret = 0;
-
-   if (batch->use_shadow_copy) {
-      void *bo_map = brw_bo_map(brw, batch->batch.bo, MAP_WRITE);
-      memcpy(bo_map, batch->batch.map, 4 * USED_BATCH(*batch));
-
-      bo_map = brw_bo_map(brw, batch->state.bo, MAP_WRITE);
-      memcpy(bo_map, batch->state.map, batch->state_used);
-   }
-
-   brw_bo_unmap(batch->batch.bo);
-   brw_bo_unmap(batch->state.bo);
-
-   if (!brw->screen->devinfo.no_hw) {
-      /* The requirement for using I915_EXEC_NO_RELOC are:
-       *
-       *   The addresses written in the objects must match the corresponding
-       *   reloc.gtt_offset which in turn must match the corresponding
-       *   execobject.offset.
-       *
-       *   Any render targets written to in the batch must be flagged with
-       *   EXEC_OBJECT_WRITE.
-       *
-       *   To avoid stalling, execobject.offset should match the current
-       *   address of that object within the active context.
-       */
-      int flags = I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
-
-      if (batch->needs_sol_reset)
-         flags |= I915_EXEC_GEN7_SOL_RESET;
-
-      /* Set statebuffer relocations */
-      const unsigned state_index = batch->state.bo->index;
-      if (state_index < batch->exec_count &&
-          batch->exec_bos[state_index] == batch->state.bo) {
-         struct drm_i915_gem_exec_object2 *entry =
-            &batch->validation_list[state_index];
-         assert(entry->handle == batch->state.bo->gem_handle);
-         entry->relocation_count = batch->state_relocs.reloc_count;
-         entry->relocs_ptr = (uintptr_t) batch->state_relocs.relocs;
-      }
-
-      /* Set batchbuffer relocations */
-      struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[0];
-      assert(entry->handle == batch->batch.bo->gem_handle);
-      entry->relocation_count = batch->batch_relocs.reloc_count;
-      entry->relocs_ptr = (uintptr_t) batch->batch_relocs.relocs;
-
-      if (batch->use_batch_first) {
-         flags |= I915_EXEC_BATCH_FIRST | I915_EXEC_HANDLE_LUT;
-      } else {
-         /* Move the batch to the end of the validation list */
-         struct drm_i915_gem_exec_object2 tmp;
-         struct brw_bo *tmp_bo;
-         const unsigned index = batch->exec_count - 1;
-
-         tmp = *entry;
-         *entry = batch->validation_list[index];
-         batch->validation_list[index] = tmp;
-
-         tmp_bo = batch->exec_bos[0];
-         batch->exec_bos[0] = batch->exec_bos[index];
-         batch->exec_bos[index] = tmp_bo;
-      }
-
-      ret = execbuffer(brw->screen->fd, batch, brw->hw_ctx,
-                       4 * USED_BATCH(*batch),
-                       in_fence_fd, out_fence_fd, flags);
-
-      throttle(brw);
-   }
-
-   if (INTEL_DEBUG(DEBUG_BATCH)) {
-      intel_print_batch(&batch->decoder, batch->batch.map,
-                        4 * USED_BATCH(*batch),
-                        batch->batch.bo->gtt_offset, false);
-   }
-
-   if (brw->ctx.Const.ResetStrategy == GL_LOSE_CONTEXT_ON_RESET_ARB)
-      brw_check_for_reset(brw);
-
-   if (ret != 0) {
-      fprintf(stderr, "i965: Failed to submit batchbuffer: %s\n",
-              strerror(-ret));
-      abort();
-   }
-
-   return ret;
-}
-
-/**
- * The in_fence_fd is ignored if -1.  Otherwise this function takes ownership
- * of the fd.
- *
- * The out_fence_fd is ignored if NULL. Otherwise, the caller takes ownership
- * of the returned fd.
- */
-int
-_brw_batch_flush_fence(struct brw_context *brw,
-                               int in_fence_fd, int *out_fence_fd,
-                               const char *file, int line)
-{
-   int ret;
-
-   if (USED_BATCH(brw->batch) == 0 && !brw->batch.contains_fence_signal)
-      return 0;
-
-   /* Check that we didn't just wrap our batchbuffer at a bad time. */
-   assert(!brw->batch.no_wrap);
-
-   brw_finish_batch(brw);
-   brw_upload_finish(&brw->upload);
-
-   finish_growing_bos(&brw->batch.batch);
-   finish_growing_bos(&brw->batch.state);
-
-   if (brw->throttle_batch[0] == NULL) {
-      brw->throttle_batch[0] = brw->batch.batch.bo;
-      brw_bo_reference(brw->throttle_batch[0]);
-   }
-
-   if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_SUBMIT)) {
-      int bytes_for_commands = 4 * USED_BATCH(brw->batch);
-      int bytes_for_state = brw->batch.state_used;
-      fprintf(stderr, "%19s:%-3d: Batchbuffer flush with %5db (%0.1f%%) (pkt),"
-              " %5db (%0.1f%%) (state), %4d BOs (%0.1fMb aperture),"
-              " %4d batch relocs, %4d state relocs\n", file, line,
-              bytes_for_commands, 100.0f * bytes_for_commands / BATCH_SZ,
-              bytes_for_state, 100.0f * bytes_for_state / STATE_SZ,
-              brw->batch.exec_count,
-              (float) (brw->batch.aperture_space / (1024 * 1024)),
-              brw->batch.batch_relocs.reloc_count,
-              brw->batch.state_relocs.reloc_count);
-
-      dump_validation_list(&brw->batch);
-   }
-
-   ret = submit_batch(brw, in_fence_fd, out_fence_fd);
-
-   if (INTEL_DEBUG(DEBUG_SYNC)) {
-      fprintf(stderr, "waiting for idle\n");
-      brw_bo_wait_rendering(brw->batch.batch.bo);
-   }
-
-   /* Start a new batch buffer. */
-   brw_new_batch(brw);
-
-   return ret;
-}
-
-void
-brw_batch_maybe_noop(struct brw_context *brw)
-{
-   if (!brw->frontend_noop || USED_BATCH(brw->batch) != 0)
-      return;
-
-   BEGIN_BATCH(1);
-   OUT_BATCH(MI_BATCH_BUFFER_END);
-   ADVANCE_BATCH();
-}
-
-bool
-brw_batch_references(struct brw_batch *batch, struct brw_bo *bo)
-{
-   unsigned index = READ_ONCE(bo->index);
-   if (index < batch->exec_count && batch->exec_bos[index] == bo)
-      return true;
-
-   for (int i = 0; i < batch->exec_count; i++) {
-      if (batch->exec_bos[i] == bo)
-         return true;
-   }
-   return false;
-}
-
-/*  This is the only way buffers get added to the validate list.
- */
-static uint64_t
-emit_reloc(struct brw_batch *batch,
-           struct brw_reloc_list *rlist, uint32_t offset,
-           struct brw_bo *target, int32_t target_offset,
-           unsigned int reloc_flags)
-{
-   assert(target != NULL);
-
-   if (target->kflags & EXEC_OBJECT_PINNED) {
-      brw_use_pinned_bo(batch, target, reloc_flags & RELOC_WRITE);
-      return intel_canonical_address(target->gtt_offset + target_offset);
-   }
-
-   unsigned int index = add_exec_bo(batch, target);
-   struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[index];
-
-   if (rlist->reloc_count == rlist->reloc_array_size) {
-      rlist->reloc_array_size *= 2;
-      rlist->relocs = realloc(rlist->relocs,
-                              rlist->reloc_array_size *
-                              sizeof(struct drm_i915_gem_relocation_entry));
-   }
-
-   if (reloc_flags & RELOC_32BIT) {
-      /* Restrict this buffer to the low 32 bits of the address space.
-       *
-       * Altering the validation list flags restricts it for this batch,
-       * but we also alter the BO's kflags to restrict it permanently
-       * (until the BO is destroyed and put back in the cache).  Buffers
-       * may stay bound across batches, and we want keep it constrained.
-       */
-      target->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
-      entry->flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
-
-      /* RELOC_32BIT is not an EXEC_OBJECT_* flag, so get rid of it. */
-      reloc_flags &= ~RELOC_32BIT;
-   }
-
-   if (reloc_flags)
-      entry->flags |= reloc_flags & batch->valid_reloc_flags;
-
-   rlist->relocs[rlist->reloc_count++] =
-      (struct drm_i915_gem_relocation_entry) {
-         .offset = offset,
-         .delta = target_offset,
-         .target_handle = batch->use_batch_first ? index : target->gem_handle,
-         .presumed_offset = entry->offset,
-      };
-
-   /* Using the old buffer offset, write in what the right data would be, in
-    * case the buffer doesn't move and we can short-circuit the relocation
-    * processing in the kernel
-    */
-   return entry->offset + target_offset;
-}
-
-void
-brw_use_pinned_bo(struct brw_batch *batch, struct brw_bo *bo,
-                  unsigned writable_flag)
-{
-   assert(bo->kflags & EXEC_OBJECT_PINNED);
-   assert((writable_flag & ~EXEC_OBJECT_WRITE) == 0);
-
-   unsigned int index = add_exec_bo(batch, bo);
-   struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[index];
-   assert(entry->offset == bo->gtt_offset);
-
-   if (writable_flag)
-      entry->flags |= EXEC_OBJECT_WRITE;
-}
-
-uint64_t
-brw_batch_reloc(struct brw_batch *batch, uint32_t batch_offset,
-                struct brw_bo *target, uint32_t target_offset,
-                unsigned int reloc_flags)
-{
-   assert(batch_offset <= batch->batch.bo->size - sizeof(uint32_t));
-
-   return emit_reloc(batch, &batch->batch_relocs, batch_offset,
-                     target, target_offset, reloc_flags);
-}
-
-uint64_t
-brw_state_reloc(struct brw_batch *batch, uint32_t state_offset,
-                struct brw_bo *target, uint32_t target_offset,
-                unsigned int reloc_flags)
-{
-   assert(state_offset <= batch->state.bo->size - sizeof(uint32_t));
-
-   return emit_reloc(batch, &batch->state_relocs, state_offset,
-                     target, target_offset, reloc_flags);
-}
-
-/**
- * Reserve some space in the statebuffer, or flush.
- *
- * This is used to estimate when we're near the end of the batch,
- * so we can flush early.
- */
-void
-brw_require_statebuffer_space(struct brw_context *brw, int size)
-{
-   if (brw->batch.state_used + size >= STATE_SZ)
-      brw_batch_flush(brw);
-}
-
-/**
- * Allocates a block of space in the batchbuffer for indirect state.
- */
-void *
-brw_state_batch(struct brw_context *brw,
-                int size,
-                int alignment,
-                uint32_t *out_offset)
-{
-   struct brw_batch *batch = &brw->batch;
-
-   assert(size < batch->state.bo->size);
-
-   uint32_t offset = ALIGN(batch->state_used, alignment);
-
-   if (offset + size >= STATE_SZ && !batch->no_wrap) {
-      brw_batch_flush(brw);
-      offset = ALIGN(batch->state_used, alignment);
-   } else if (offset + size >= batch->state.bo->size) {
-      const unsigned new_size =
-         MIN2(batch->state.bo->size + batch->state.bo->size / 2,
-              MAX_STATE_SIZE);
-      grow_buffer(brw, &batch->state, batch->state_used, new_size);
-      assert(offset + size < batch->state.bo->size);
-   }
-
-   if (INTEL_DEBUG(DEBUG_BATCH)) {
-      _mesa_hash_table_u64_insert(batch->state_batch_sizes,
-                                  offset, (void *) (uintptr_t) size);
-   }
-
-   batch->state_used = offset + size;
-
-   *out_offset = offset;
-   return batch->state.map + (offset >> 2);
-}
-
-void
-brw_batch_data(struct brw_context *brw,
-                       const void *data, GLuint bytes)
-{
-   assert((bytes & 3) == 0);
-   brw_batch_require_space(brw, bytes);
-   memcpy(brw->batch.map_next, data, bytes);
-   brw->batch.map_next += bytes >> 2;
-}
-
-static void
-load_sized_register_mem(struct brw_context *brw,
-                        uint32_t reg,
-                        struct brw_bo *bo,
-                        uint32_t offset,
-                        int size)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   int i;
-
-   /* MI_LOAD_REGISTER_MEM only exists on Gfx7+. */
-   assert(devinfo->ver >= 7);
-
-   if (devinfo->ver >= 8) {
-      BEGIN_BATCH(4 * size);
-      for (i = 0; i < size; i++) {
-         OUT_BATCH(GFX7_MI_LOAD_REGISTER_MEM | (4 - 2));
-         OUT_BATCH(reg + i * 4);
-         OUT_RELOC64(bo, 0, offset + i * 4);
-      }
-      ADVANCE_BATCH();
-   } else {
-      BEGIN_BATCH(3 * size);
-      for (i = 0; i < size; i++) {
-         OUT_BATCH(GFX7_MI_LOAD_REGISTER_MEM | (3 - 2));
-         OUT_BATCH(reg + i * 4);
-         OUT_RELOC(bo, 0, offset + i * 4);
-      }
-      ADVANCE_BATCH();
-   }
-}
-
-void
-brw_load_register_mem(struct brw_context *brw,
-                      uint32_t reg,
-                      struct brw_bo *bo,
-                      uint32_t offset)
-{
-   load_sized_register_mem(brw, reg, bo, offset, 1);
-}
-
-void
-brw_load_register_mem64(struct brw_context *brw,
-                        uint32_t reg,
-                        struct brw_bo *bo,
-                        uint32_t offset)
-{
-   load_sized_register_mem(brw, reg, bo, offset, 2);
-}
-
-/*
- * Write an arbitrary 32-bit register to a buffer via MI_STORE_REGISTER_MEM.
- */
-void
-brw_store_register_mem32(struct brw_context *brw,
-                         struct brw_bo *bo, uint32_t reg, uint32_t offset)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   assert(devinfo->ver >= 6);
-
-   if (devinfo->ver >= 8) {
-      BEGIN_BATCH(4);
-      OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2));
-      OUT_BATCH(reg);
-      OUT_RELOC64(bo, RELOC_WRITE, offset);
-      ADVANCE_BATCH();
-   } else {
-      BEGIN_BATCH(3);
-      OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
-      OUT_BATCH(reg);
-      OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, offset);
-      ADVANCE_BATCH();
-   }
-}
-
-/*
- * Write an arbitrary 64-bit register to a buffer via MI_STORE_REGISTER_MEM.
- */
-void
-brw_store_register_mem64(struct brw_context *brw,
-                         struct brw_bo *bo, uint32_t reg, uint32_t offset)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   assert(devinfo->ver >= 6);
-
-   /* MI_STORE_REGISTER_MEM only stores a single 32-bit value, so to
-    * read a full 64-bit register, we need to do two of them.
-    */
-   if (devinfo->ver >= 8) {
-      BEGIN_BATCH(8);
-      OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2));
-      OUT_BATCH(reg);
-      OUT_RELOC64(bo, RELOC_WRITE, offset);
-      OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2));
-      OUT_BATCH(reg + sizeof(uint32_t));
-      OUT_RELOC64(bo, RELOC_WRITE, offset + sizeof(uint32_t));
-      ADVANCE_BATCH();
-   } else {
-      BEGIN_BATCH(6);
-      OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
-      OUT_BATCH(reg);
-      OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, offset);
-      OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
-      OUT_BATCH(reg + sizeof(uint32_t));
-      OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, offset + sizeof(uint32_t));
-      ADVANCE_BATCH();
-   }
-}
-
-/*
- * Write a 32-bit register using immediate data.
- */
-void
-brw_load_register_imm32(struct brw_context *brw, uint32_t reg, uint32_t imm)
-{
-   assert(brw->screen->devinfo.ver >= 6);
-
-   BEGIN_BATCH(3);
-   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
-   OUT_BATCH(reg);
-   OUT_BATCH(imm);
-   ADVANCE_BATCH();
-}
-
-/*
- * Write a 64-bit register using immediate data.
- */
-void
-brw_load_register_imm64(struct brw_context *brw, uint32_t reg, uint64_t imm)
-{
-   assert(brw->screen->devinfo.ver >= 6);
-
-   BEGIN_BATCH(5);
-   OUT_BATCH(MI_LOAD_REGISTER_IMM | (5 - 2));
-   OUT_BATCH(reg);
-   OUT_BATCH(imm & 0xffffffff);
-   OUT_BATCH(reg + 4);
-   OUT_BATCH(imm >> 32);
-   ADVANCE_BATCH();
-}
-
-/*
- * Copies a 32-bit register.
- */
-void
-brw_load_register_reg(struct brw_context *brw, uint32_t dest, uint32_t src)
-{
-   assert(brw->screen->devinfo.verx10 >= 75);
-
-   BEGIN_BATCH(3);
-   OUT_BATCH(MI_LOAD_REGISTER_REG | (3 - 2));
-   OUT_BATCH(src);
-   OUT_BATCH(dest);
-   ADVANCE_BATCH();
-}
-
-/*
- * Copies a 64-bit register.
- */
-void
-brw_load_register_reg64(struct brw_context *brw, uint32_t dest, uint32_t src)
-{
-   assert(brw->screen->devinfo.verx10 >= 75);
-
-   BEGIN_BATCH(6);
-   OUT_BATCH(MI_LOAD_REGISTER_REG | (3 - 2));
-   OUT_BATCH(src);
-   OUT_BATCH(dest);
-   OUT_BATCH(MI_LOAD_REGISTER_REG | (3 - 2));
-   OUT_BATCH(src + sizeof(uint32_t));
-   OUT_BATCH(dest + sizeof(uint32_t));
-   ADVANCE_BATCH();
-}
-
-/*
- * Write 32-bits of immediate data to a GPU memory buffer.
- */
-void
-brw_store_data_imm32(struct brw_context *brw, struct brw_bo *bo,
-                     uint32_t offset, uint32_t imm)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   assert(devinfo->ver >= 6);
-
-   BEGIN_BATCH(4);
-   OUT_BATCH(MI_STORE_DATA_IMM | (4 - 2));
-   if (devinfo->ver >= 8)
-      OUT_RELOC64(bo, RELOC_WRITE, offset);
-   else {
-      OUT_BATCH(0); /* MBZ */
-      OUT_RELOC(bo, RELOC_WRITE, offset);
-   }
-   OUT_BATCH(imm);
-   ADVANCE_BATCH();
-}
-
-/*
- * Write 64-bits of immediate data to a GPU memory buffer.
- */
-void
-brw_store_data_imm64(struct brw_context *brw, struct brw_bo *bo,
-                     uint32_t offset, uint64_t imm)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   assert(devinfo->ver >= 6);
-
-   BEGIN_BATCH(5);
-   OUT_BATCH(MI_STORE_DATA_IMM | (5 - 2));
-   if (devinfo->ver >= 8)
-      OUT_RELOC64(bo, RELOC_WRITE, offset);
-   else {
-      OUT_BATCH(0); /* MBZ */
-      OUT_RELOC(bo, RELOC_WRITE, offset);
-   }
-   OUT_BATCH(imm & 0xffffffffu);
-   OUT_BATCH(imm >> 32);
-   ADVANCE_BATCH();
-}
diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h
deleted file mode 100644
index 39b383b..0000000
--- a/src/mesa/drivers/dri/i965/brw_batch.h
+++ /dev/null
@@ -1,166 +0,0 @@
-#ifndef BRW_BATCH_H
-#define BRW_BATCH_H
-
-#include "main/mtypes.h"
-
-#include "brw_context.h"
-#include "brw_bufmgr.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* The kernel assumes batchbuffers are smaller than 256kB. */
-#define MAX_BATCH_SIZE (256 * 1024)
-
-/* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base
- * Address, which means that we can't put binding tables beyond 64kB.  This
- * effectively limits the maximum statebuffer size to 64kB.
- */
-#define MAX_STATE_SIZE (64 * 1024)
-
-struct brw_batch;
-
-void brw_batch_init(struct brw_context *brw);
-void brw_batch_free(struct brw_batch *batch);
-void brw_batch_save_state(struct brw_context *brw);
-bool brw_batch_saved_state_is_empty(struct brw_context *brw);
-void brw_batch_reset_to_saved(struct brw_context *brw);
-void brw_batch_require_space(struct brw_context *brw, GLuint sz);
-int _brw_batch_flush_fence(struct brw_context *brw,
-                                   int in_fence_fd, int *out_fence_fd,
-                                   const char *file, int line);
-void brw_batch_maybe_noop(struct brw_context *brw);
-
-#define brw_batch_flush(brw) \
-   _brw_batch_flush_fence((brw), -1, NULL, __FILE__, __LINE__)
-
-#define brw_batch_flush_fence(brw, in_fence_fd, out_fence_fd) \
-   _brw_batch_flush_fence((brw), (in_fence_fd), (out_fence_fd), \
-                                  __FILE__, __LINE__)
-
-/* Unlike bmBufferData, this currently requires the buffer be mapped.
- * Consider it a convenience function wrapping multple
- * brw_buffer_dword() calls.
- */
-void brw_batch_data(struct brw_context *brw,
-                            const void *data, GLuint bytes);
-
-static inline bool
-brw_batch_has_aperture_space(struct brw_context *brw, uint64_t extra_space)
-{
-   return brw->batch.aperture_space + extra_space <=
-          brw->screen->aperture_threshold;
-}
-
-bool brw_batch_references(struct brw_batch *batch, struct brw_bo *bo);
-
-#define RELOC_WRITE EXEC_OBJECT_WRITE
-#define RELOC_NEEDS_GGTT EXEC_OBJECT_NEEDS_GTT
-/* Inverted meaning, but using the same bit...emit_reloc will flip it. */
-#define RELOC_32BIT EXEC_OBJECT_SUPPORTS_48B_ADDRESS
-
-void brw_use_pinned_bo(struct brw_batch *batch, struct brw_bo *bo,
-                       unsigned writeable_flag);
-
-uint64_t brw_batch_reloc(struct brw_batch *batch,
-                         uint32_t batch_offset,
-                         struct brw_bo *target,
-                         uint32_t target_offset,
-                         unsigned flags);
-uint64_t brw_state_reloc(struct brw_batch *batch,
-                         uint32_t batch_offset,
-                         struct brw_bo *target,
-                         uint32_t target_offset,
-                         unsigned flags);
-
-#define USED_BATCH(_batch) \
-   ((uintptr_t)((_batch).map_next - (_batch).batch.map))
-
-static inline uint32_t float_as_int(float f)
-{
-   union {
-      float f;
-      uint32_t d;
-   } fi;
-
-   fi.f = f;
-   return fi.d;
-}
-
-static inline void
-brw_batch_begin(struct brw_context *brw, int n)
-{
-   brw_batch_require_space(brw, n * 4);
-
-#ifdef DEBUG
-   brw->batch.emit = USED_BATCH(brw->batch);
-   brw->batch.total = n;
-#endif
-}
-
-static inline void
-brw_batch_advance(struct brw_context *brw)
-{
-#ifdef DEBUG
-   struct brw_batch *batch = &brw->batch;
-   unsigned int _n = USED_BATCH(*batch) - batch->emit;
-   assert(batch->total != 0);
-   if (_n != batch->total) {
-      fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n",
-              _n, batch->total);
-      abort();
-   }
-   batch->total = 0;
-#else
-   (void) brw;
-#endif
-}
-
-static inline bool
-brw_ptr_in_state_buffer(struct brw_batch *batch, void *p)
-{
-   return (char *) p >= (char *) batch->state.map &&
-          (char *) p < (char *) batch->state.map + batch->state.bo->size;
-}
-
-#define BEGIN_BATCH(n) do {                            \
-   brw_batch_begin(brw, (n));                  \
-   uint32_t *__map = brw->batch.map_next;              \
-   brw->batch.map_next += (n)
-
-#define BEGIN_BATCH_BLT(n) do {                        \
-   assert(brw->screen->devinfo.ver < 6);               \
-   brw_batch_begin(brw, (n));                  \
-   uint32_t *__map = brw->batch.map_next;              \
-   brw->batch.map_next += (n)
-
-#define OUT_BATCH(d) *__map++ = (d)
-#define OUT_BATCH_F(f) OUT_BATCH(float_as_int((f)))
-
-#define OUT_RELOC(buf, flags, delta) do {          \
-   uint32_t __offset = (__map - brw->batch.batch.map) * 4;              \
-   uint32_t reloc =                                                     \
-      brw_batch_reloc(&brw->batch, __offset, (buf), (delta), (flags));  \
-   OUT_BATCH(reloc);                                                    \
-} while (0)
-
-/* Handle 48-bit address relocations for Gfx8+ */
-#define OUT_RELOC64(buf, flags, delta) do {        \
-   uint32_t __offset = (__map - brw->batch.batch.map) * 4;              \
-   uint64_t reloc64 =                                                   \
-      brw_batch_reloc(&brw->batch, __offset, (buf), (delta), (flags));  \
-   OUT_BATCH(reloc64);                                                  \
-   OUT_BATCH(reloc64 >> 32);                                            \
-} while (0)
-
-#define ADVANCE_BATCH()                  \
-   assert(__map == brw->batch.map_next); \
-   brw_batch_advance(brw);       \
-} while (0)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c
deleted file mode 100644
index 8ecdcc5..0000000
--- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * Copyright Â© 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/**
- * \file brw_binding_tables.c
- *
- * State atoms which upload the "binding table" for each shader stage.
- *
- * Binding tables map a numeric "surface index" to the SURFACE_STATE structure
- * for a currently bound surface.  This allows SEND messages (such as sampler
- * or data port messages) to refer to a particular surface by number, rather
- * than by pointer.
- *
- * The binding table is stored as a (sparse) array of SURFACE_STATE entries;
- * surface indexes are simply indexes into the array.  The ordering of the
- * entries is entirely left up to software; see the SURF_INDEX_* macros in
- * brw_context.h to see our current layout.
- */
-
-#include "main/mtypes.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "brw_batch.h"
-
-/**
- * Upload a shader stage's binding table as indirect state.
- *
- * This copies brw_stage_state::surf_offset[] into the indirect state section
- * of the batchbuffer (allocated by brw_state_batch()).
- */
-void
-brw_upload_binding_table(struct brw_context *brw,
-                         uint32_t packet_name,
-                         const struct brw_stage_prog_data *prog_data,
-                         struct brw_stage_state *stage_state)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (prog_data->binding_table.size_bytes == 0) {
-      /* There are no surfaces; skip making the binding table altogether. */
-      if (stage_state->bind_bo_offset == 0 && devinfo->ver < 9)
-         return;
-
-      stage_state->bind_bo_offset = 0;
-   } else {
-      /* Upload a new binding table. */
-      if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
-         brw_emit_buffer_surface_state(
-            brw, &stage_state->surf_offset[
-                    prog_data->binding_table.shader_time_start],
-            brw->shader_time.bo, 0, ISL_FORMAT_RAW,
-            brw->shader_time.bo->size, 1, RELOC_WRITE);
-      }
-      uint32_t *bind =
-         brw_state_batch(brw, prog_data->binding_table.size_bytes,
-                         32, &stage_state->bind_bo_offset);
-
-      /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
-      memcpy(bind, stage_state->surf_offset,
-             prog_data->binding_table.size_bytes);
-   }
-
-   brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
-
-   if (devinfo->ver >= 7) {
-      BEGIN_BATCH(2);
-      OUT_BATCH(packet_name << 16 | (2 - 2));
-      /* Align SurfaceStateOffset[16:6] format to [15:5] PS Binding Table field
-       * when hw-generated binding table is enabled.
-       */
-      OUT_BATCH(stage_state->bind_bo_offset);
-      ADVANCE_BATCH();
-   }
-}
-
-/**
- * State atoms which upload the binding table for a particular shader stage.
- *  @{
- */
-
-/** Upload the VS binding table. */
-static void
-brw_vs_upload_binding_table(struct brw_context *brw)
-{
-   /* BRW_NEW_VS_PROG_DATA */
-   const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
-   brw_upload_binding_table(brw,
-                            _3DSTATE_BINDING_TABLE_POINTERS_VS,
-                            prog_data,
-                            &brw->vs.base);
-}
-
-const struct brw_tracked_state brw_vs_binding_table = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_VS_CONSTBUF |
-             BRW_NEW_VS_PROG_DATA |
-             BRW_NEW_SURFACES,
-   },
-   .emit = brw_vs_upload_binding_table,
-};
-
-
-/** Upload the PS binding table. */
-static void
-brw_upload_wm_binding_table(struct brw_context *brw)
-{
-   /* BRW_NEW_FS_PROG_DATA */
-   const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
-   brw_upload_binding_table(brw,
-                            _3DSTATE_BINDING_TABLE_POINTERS_PS,
-                            prog_data,
-                            &brw->wm.base);
-}
-
-const struct brw_tracked_state brw_wm_binding_table = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_FS_PROG_DATA |
-             BRW_NEW_SURFACES,
-   },
-   .emit = brw_upload_wm_binding_table,
-};
-
-/** Upload the TCS binding table (if tessellation stages are active). */
-static void
-brw_tcs_upload_binding_table(struct brw_context *brw)
-{
-   /* Skip if the tessellation stages are disabled. */
-   if (brw->programs[MESA_SHADER_TESS_EVAL] == NULL)
-      return;
-
-   /* BRW_NEW_TCS_PROG_DATA */
-   const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
-   brw_upload_binding_table(brw,
-                            _3DSTATE_BINDING_TABLE_POINTERS_HS,
-                            prog_data,
-                            &brw->tcs.base);
-}
-
-const struct brw_tracked_state brw_tcs_binding_table = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_DEFAULT_TESS_LEVELS |
-             BRW_NEW_SURFACES |
-             BRW_NEW_TCS_CONSTBUF |
-             BRW_NEW_TCS_PROG_DATA,
-   },
-   .emit = brw_tcs_upload_binding_table,
-};
-
-/** Upload the TES binding table (if TES is active). */
-static void
-brw_tes_upload_binding_table(struct brw_context *brw)
-{
-   /* If there's no TES, skip changing anything. */
-   if (brw->programs[MESA_SHADER_TESS_EVAL] == NULL)
-      return;
-
-   /* BRW_NEW_TES_PROG_DATA */
-   const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
-   brw_upload_binding_table(brw,
-                            _3DSTATE_BINDING_TABLE_POINTERS_DS,
-                            prog_data,
-                            &brw->tes.base);
-}
-
-const struct brw_tracked_state brw_tes_binding_table = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_SURFACES |
-             BRW_NEW_TES_CONSTBUF |
-             BRW_NEW_TES_PROG_DATA,
-   },
-   .emit = brw_tes_upload_binding_table,
-};
-
-/** Upload the GS binding table (if GS is active). */
-static void
-brw_gs_upload_binding_table(struct brw_context *brw)
-{
-   /* If there's no GS, skip changing anything. */
-   if (brw->programs[MESA_SHADER_GEOMETRY] == NULL)
-      return;
-
-   /* BRW_NEW_GS_PROG_DATA */
-   const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
-   brw_upload_binding_table(brw,
-                            _3DSTATE_BINDING_TABLE_POINTERS_GS,
-                            prog_data,
-                            &brw->gs.base);
-}
-
-const struct brw_tracked_state brw_gs_binding_table = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_GS_CONSTBUF |
-             BRW_NEW_GS_PROG_DATA |
-             BRW_NEW_SURFACES,
-   },
-   .emit = brw_gs_upload_binding_table,
-};
-/** @} */
-
-/**
- * State atoms which emit 3DSTATE packets to update the binding table pointers.
- *  @{
- */
-
-/**
- * (Gfx4-5) Upload the binding table pointers for all shader stages.
- *
- * The binding table pointers are relative to the surface state base address,
- * which points at the batchbuffer containing the streamed batch state.
- */
-static void
-gfx4_upload_binding_table_pointers(struct brw_context *brw)
-{
-   BEGIN_BATCH(6);
-   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
-   OUT_BATCH(brw->vs.base.bind_bo_offset);
-   OUT_BATCH(0); /* gs */
-   OUT_BATCH(0); /* clip */
-   OUT_BATCH(0); /* sf */
-   OUT_BATCH(brw->wm.base.bind_bo_offset);
-   ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state brw_binding_table_pointers = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_BINDING_TABLE_POINTERS |
-             BRW_NEW_STATE_BASE_ADDRESS,
-   },
-   .emit = gfx4_upload_binding_table_pointers,
-};
-
-/**
- * (Sandybridge Only) Upload the binding table pointers for all shader stages.
- *
- * The binding table pointers are relative to the surface state base address,
- * which points at the batchbuffer containing the streamed batch state.
- */
-static void
-gfx6_upload_binding_table_pointers(struct brw_context *brw)
-{
-   BEGIN_BATCH(4);
-   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
-             GFX6_BINDING_TABLE_MODIFY_VS |
-             GFX6_BINDING_TABLE_MODIFY_GS |
-             GFX6_BINDING_TABLE_MODIFY_PS |
-             (4 - 2));
-   OUT_BATCH(brw->vs.base.bind_bo_offset); /* vs */
-   if (brw->ff_gs.prog_active)
-      OUT_BATCH(brw->ff_gs.bind_bo_offset); /* gs */
-   else
-      OUT_BATCH(brw->gs.base.bind_bo_offset); /* gs */
-   OUT_BATCH(brw->wm.base.bind_bo_offset); /* wm/ps */
-   ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state gfx6_binding_table_pointers = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_BINDING_TABLE_POINTERS |
-             BRW_NEW_STATE_BASE_ADDRESS,
-   },
-   .emit = gfx6_upload_binding_table_pointers,
-};
-
-/** @} */
diff --git a/src/mesa/drivers/dri/i965/brw_blit.c b/src/mesa/drivers/dri/i965/brw_blit.c
deleted file mode 100644
index 95f00e9..0000000
--- a/src/mesa/drivers/dri/i965/brw_blit.c
+++ /dev/null
@@ -1,790 +0,0 @@
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/mtypes.h"
-#include "main/blit.h"
-#include "main/context.h"
-#include "main/enums.h"
-#include "main/fbobject.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_blit.h"
-#include "brw_buffers.h"
-#include "brw_fbo.h"
-#include "brw_batch.h"
-#include "brw_mipmap_tree.h"
-
-#define FILE_DEBUG_FLAG DEBUG_BLIT
-
-static void
-brw_miptree_set_alpha_to_one(struct brw_context *brw,
-                             struct brw_mipmap_tree *mt,
-                             int x, int y, int width, int height);
-
-static GLuint translate_raster_op(enum gl_logicop_mode logicop)
-{
-   return logicop | (logicop << 4);
-}
-
-static uint32_t
-br13_for_cpp(int cpp)
-{
-   switch (cpp) {
-   case 16:
-      return BR13_32323232;
-   case 8:
-      return BR13_16161616;
-   case 4:
-      return BR13_8888;
-   case 2:
-      return BR13_565;
-   case 1:
-      return BR13_8;
-   default:
-      unreachable("not reached");
-   }
-}
-
-/**
- * Emits the packet for switching the blitter from X to Y tiled or back.
- *
- * This has to be called in a single BEGIN_BATCH_BLT_TILED() /
- * ADVANCE_BATCH_TILED().  This is because BCS_SWCTRL is saved and restored as
- * part of the power context, not a render context, and if the batchbuffer was
- * to get flushed between setting and blitting, or blitting and restoring, our
- * tiling state would leak into other unsuspecting applications (like the X
- * server).
- */
-static uint32_t *
-set_blitter_tiling(struct brw_context *brw,
-                   bool dst_y_tiled, bool src_y_tiled,
-                   uint32_t *__map)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const unsigned n_dwords = devinfo->ver >= 8 ? 5 : 4;
-   assert(devinfo->ver >= 6);
-
-   /* Idle the blitter before we update how tiling is interpreted. */
-   OUT_BATCH(MI_FLUSH_DW | (n_dwords - 2));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   if (n_dwords == 5)
-      OUT_BATCH(0);
-
-   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
-   OUT_BATCH(BCS_SWCTRL);
-   OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 |
-             (dst_y_tiled ? BCS_SWCTRL_DST_Y : 0) |
-             (src_y_tiled ? BCS_SWCTRL_SRC_Y : 0));
-   return __map;
-}
-#define SET_BLITTER_TILING(...) __map = set_blitter_tiling(__VA_ARGS__, __map)
-
-#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled)              \
-      unsigned set_tiling_batch_size = 0;                               \
-      if (dst_y_tiled || src_y_tiled) {                                 \
-         if (devinfo->ver >= 8)                                         \
-            set_tiling_batch_size = 16;                                 \
-         else                                                           \
-            set_tiling_batch_size = 14;                                 \
-      }                                                                 \
-      BEGIN_BATCH_BLT(n + set_tiling_batch_size);                       \
-      if (dst_y_tiled || src_y_tiled)                                   \
-         SET_BLITTER_TILING(brw, dst_y_tiled, src_y_tiled)
-
-#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled)                   \
-      if (dst_y_tiled || src_y_tiled)                                   \
-         SET_BLITTER_TILING(brw, false, false);                         \
-      ADVANCE_BATCH()
-
-bool
-brw_miptree_blit_compatible_formats(mesa_format src, mesa_format dst)
-{
-   /* The BLT doesn't handle sRGB conversion */
-   assert(src == _mesa_get_srgb_format_linear(src));
-   assert(dst == _mesa_get_srgb_format_linear(dst));
-
-   /* No swizzle or format conversions possible, except... */
-   if (src == dst)
-      return true;
-
-   /* ...we can either discard the alpha channel when going from A->X,
-    * or we can fill the alpha channel with 0xff when going from X->A
-    */
-   if (src == MESA_FORMAT_B8G8R8A8_UNORM || src == MESA_FORMAT_B8G8R8X8_UNORM)
-      return (dst == MESA_FORMAT_B8G8R8A8_UNORM ||
-              dst == MESA_FORMAT_B8G8R8X8_UNORM);
-
-   if (src == MESA_FORMAT_R8G8B8A8_UNORM || src == MESA_FORMAT_R8G8B8X8_UNORM)
-      return (dst == MESA_FORMAT_R8G8B8A8_UNORM ||
-              dst == MESA_FORMAT_R8G8B8X8_UNORM);
-
-   /* We can also discard alpha when going from A2->X2 for 2 bit alpha,
-    * however we can't fill the alpha channel with two 1 bits when going
-    * from X2->A2, because brw_miptree_set_alpha_to_one() is not yet
-    * ready for this / can only handle 8 bit alpha.
-    */
-   if (src == MESA_FORMAT_B10G10R10A2_UNORM)
-      return (dst == MESA_FORMAT_B10G10R10A2_UNORM ||
-              dst == MESA_FORMAT_B10G10R10X2_UNORM);
-
-   if (src == MESA_FORMAT_R10G10B10A2_UNORM)
-      return (dst == MESA_FORMAT_R10G10B10A2_UNORM ||
-              dst == MESA_FORMAT_R10G10B10X2_UNORM);
-
-   return false;
-}
-
-static void
-get_blit_intratile_offset_el(const struct brw_context *brw,
-                             struct brw_mipmap_tree *mt,
-                             uint32_t total_x_offset_el,
-                             uint32_t total_y_offset_el,
-                             uint64_t *tile_offset_B,
-                             uint32_t *x_offset_el,
-                             uint32_t *y_offset_el)
-{
-   ASSERTED uint32_t z_offset_el, array_offset;
-   isl_tiling_get_intratile_offset_el(mt->surf.tiling, mt->surf.dim,
-                                      mt->surf.msaa_layout,
-                                      mt->cpp * 8, mt->surf.samples,
-                                      mt->surf.row_pitch_B,
-                                      mt->surf.array_pitch_el_rows,
-                                      total_x_offset_el, total_y_offset_el, 0, 0,
-                                      tile_offset_B,
-                                      x_offset_el, y_offset_el,
-                                      &z_offset_el, &array_offset);
-   assert(z_offset_el == 0);
-   assert(array_offset == 0);
-
-   if (mt->surf.tiling == ISL_TILING_LINEAR) {
-      /* From the Broadwell PRM docs for XY_SRC_COPY_BLT::SourceBaseAddress:
-       *
-       *    "Base address of the destination surface: X=0, Y=0. Lower 32bits
-       *    of the 48bit addressing. When Src Tiling is enabled (Bit_15
-       *    enabled), this address must be 4KB-aligned. When Tiling is not
-       *    enabled, this address should be CL (64byte) aligned."
-       *
-       * The offsets we get from ISL in the tiled case are already aligned.
-       * In the linear case, we need to do some of our own aligning.
-       */
-      uint32_t delta = *tile_offset_B & 63;
-      assert(delta % mt->cpp == 0);
-      *tile_offset_B -= delta;
-      *x_offset_el += delta / mt->cpp;
-   } else {
-      assert(*tile_offset_B % 4096 == 0);
-   }
-}
-
-static bool
-alignment_valid(struct brw_context *brw, unsigned offset,
-                enum isl_tiling tiling)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   /* Tiled buffers must be page-aligned (4K). */
-   if (tiling != ISL_TILING_LINEAR)
-      return (offset & 4095) == 0;
-
-   /* On Gfx8+, linear buffers must be cacheline-aligned. */
-   if (devinfo->ver >= 8)
-      return (offset & 63) == 0;
-
-   return true;
-}
-
-static uint32_t
-xy_blit_cmd(enum isl_tiling src_tiling, enum isl_tiling dst_tiling,
-            uint32_t cpp)
-{
-   uint32_t CMD = 0;
-
-   assert(cpp <= 4);
-   switch (cpp) {
-   case 1:
-   case 2:
-      CMD = XY_SRC_COPY_BLT_CMD;
-      break;
-   case 4:
-      CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
-      break;
-   default:
-      unreachable("not reached");
-   }
-
-   if (dst_tiling != ISL_TILING_LINEAR)
-      CMD |= XY_DST_TILED;
-
-   if (src_tiling != ISL_TILING_LINEAR)
-      CMD |= XY_SRC_TILED;
-
-   return CMD;
-}
-
-/* Copy BitBlt
- */
-static bool
-emit_copy_blit(struct brw_context *brw,
-               GLuint cpp,
-               int32_t src_pitch,
-               struct brw_bo *src_buffer,
-               GLuint src_offset,
-               enum isl_tiling src_tiling,
-               int32_t dst_pitch,
-               struct brw_bo *dst_buffer,
-               GLuint dst_offset,
-               enum isl_tiling dst_tiling,
-               GLshort src_x, GLshort src_y,
-               GLshort dst_x, GLshort dst_y,
-               GLshort w, GLshort h,
-               enum gl_logicop_mode logic_op)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   GLuint CMD, BR13;
-   int dst_y2 = dst_y + h;
-   int dst_x2 = dst_x + w;
-   bool dst_y_tiled = dst_tiling == ISL_TILING_Y0;
-   bool src_y_tiled = src_tiling == ISL_TILING_Y0;
-   uint32_t src_tile_w, src_tile_h;
-   uint32_t dst_tile_w, dst_tile_h;
-
-   if ((dst_y_tiled || src_y_tiled) && devinfo->ver < 6)
-      return false;
-
-   const unsigned bo_sizes = dst_buffer->size + src_buffer->size;
-
-   /* do space check before going any further */
-   if (!brw_batch_has_aperture_space(brw, bo_sizes))
-      brw_batch_flush(brw);
-
-   if (!brw_batch_has_aperture_space(brw, bo_sizes))
-      return false;
-
-   unsigned length = devinfo->ver >= 8 ? 10 : 8;
-
-   brw_batch_require_space(brw, length * 4);
-   DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
-       __func__,
-       src_buffer, src_pitch, src_offset, src_x, src_y,
-       dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
-
-   isl_get_tile_dims(src_tiling, cpp, &src_tile_w, &src_tile_h);
-   isl_get_tile_dims(dst_tiling, cpp, &dst_tile_w, &dst_tile_h);
-
-   /* For Tiled surfaces, the pitch has to be a multiple of the Tile width
-    * (X direction width of the Tile). This is ensured while allocating the
-    * buffer object.
-    */
-   assert(src_tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0);
-   assert(dst_tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0);
-
-   /* For big formats (such as floating point), do the copy using 16 or
-    * 32bpp and multiply the coordinates.
-    */
-   if (cpp > 4) {
-      if (cpp % 4 == 2) {
-         dst_x *= cpp / 2;
-         dst_x2 *= cpp / 2;
-         src_x *= cpp / 2;
-         cpp = 2;
-      } else {
-         assert(cpp % 4 == 0);
-         dst_x *= cpp / 4;
-         dst_x2 *= cpp / 4;
-         src_x *= cpp / 4;
-         cpp = 4;
-      }
-   }
-
-   if (!alignment_valid(brw, dst_offset, dst_tiling))
-      return false;
-   if (!alignment_valid(brw, src_offset, src_tiling))
-      return false;
-
-   /* Blit pitch must be dword-aligned.  Otherwise, the hardware appears to drop
-    * the low bits.  Offsets must be naturally aligned.
-    */
-   if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
-       dst_pitch % 4 != 0 || dst_offset % cpp != 0)
-      return false;
-
-   assert(cpp <= 4);
-   BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
-
-   CMD = xy_blit_cmd(src_tiling, dst_tiling, cpp);
-
-   /* For tiled source and destination, pitch value should be specified
-    * as a number of Dwords.
-    */
-   if (dst_tiling != ISL_TILING_LINEAR)
-      dst_pitch /= 4;
-
-   if (src_tiling != ISL_TILING_LINEAR)
-      src_pitch /= 4;
-
-   if (dst_y2 <= dst_y || dst_x2 <= dst_x)
-      return true;
-
-   assert(dst_x < dst_x2);
-   assert(dst_y < dst_y2);
-
-   BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, src_y_tiled);
-   OUT_BATCH(CMD | (length - 2));
-   OUT_BATCH(BR13 | (uint16_t)dst_pitch);
-   OUT_BATCH(SET_FIELD(dst_y, BLT_Y) | SET_FIELD(dst_x, BLT_X));
-   OUT_BATCH(SET_FIELD(dst_y2, BLT_Y) | SET_FIELD(dst_x2, BLT_X));
-   if (devinfo->ver >= 8) {
-      OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset);
-   } else {
-      OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset);
-   }
-   OUT_BATCH(SET_FIELD(src_y, BLT_Y) | SET_FIELD(src_x, BLT_X));
-   OUT_BATCH((uint16_t)src_pitch);
-   if (devinfo->ver >= 8) {
-      OUT_RELOC64(src_buffer, 0, src_offset);
-   } else {
-      OUT_RELOC(src_buffer, 0, src_offset);
-   }
-
-   ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled);
-
-   brw_emit_mi_flush(brw);
-
-   return true;
-}
-
-static bool
-emit_miptree_blit(struct brw_context *brw,
-                  struct brw_mipmap_tree *src_mt,
-                  uint32_t src_x, uint32_t src_y,
-                  struct brw_mipmap_tree *dst_mt,
-                  uint32_t dst_x, uint32_t dst_y,
-                  uint32_t width, uint32_t height,
-                  bool reverse, enum gl_logicop_mode logicop)
-{
-   /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
-    * Data Size Limitations):
-    *
-    *    The BLT engine is capable of transferring very large quantities of
-    *    graphics data. Any graphics data read from and written to the
-    *    destination is permitted to represent a number of pixels that
-    *    occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
-    *    at the destination. The maximum number of pixels that may be
-    *    represented per scan lineâs worth of graphics data depends on the
-    *    color depth.
-    *
-    * The blitter's pitch is a signed 16-bit integer, but measured in bytes
-    * for linear surfaces and DWords for tiled surfaces.  So the maximum
-    * pitch is 32k linear and 128k tiled.
-    */
-   if (brw_miptree_blt_pitch(src_mt) >= 32768 ||
-       brw_miptree_blt_pitch(dst_mt) >= 32768) {
-      perf_debug("Falling back due to >= 32k/128k pitch\n");
-      return false;
-   }
-
-   /* We need to split the blit into chunks that each fit within the blitter's
-    * restrictions.  We can't use a chunk size of 32768 because we need to
-    * ensure that src_tile_x + chunk_size fits.  We choose 16384 because it's
-    * a nice round power of two, big enough that performance won't suffer, and
-    * small enough to guarantee everything fits.
-    */
-   const uint32_t max_chunk_size = 16384;
-
-   for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) {
-      for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) {
-         const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x);
-         const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y);
-
-         uint64_t src_offset;
-         uint32_t src_tile_x, src_tile_y;
-         get_blit_intratile_offset_el(brw, src_mt,
-                                      src_x + chunk_x, src_y + chunk_y,
-                                      &src_offset, &src_tile_x, &src_tile_y);
-
-         uint64_t dst_offset;
-         uint32_t dst_tile_x, dst_tile_y;
-         get_blit_intratile_offset_el(brw, dst_mt,
-                                      dst_x + chunk_x, dst_y + chunk_y,
-                                      &dst_offset, &dst_tile_x, &dst_tile_y);
-
-         if (!emit_copy_blit(brw,
-                             src_mt->cpp,
-                             reverse ? -src_mt->surf.row_pitch_B :
-                                        src_mt->surf.row_pitch_B,
-                             src_mt->bo, src_mt->offset + src_offset,
-                             src_mt->surf.tiling,
-                             dst_mt->surf.row_pitch_B,
-                             dst_mt->bo, dst_mt->offset + dst_offset,
-                             dst_mt->surf.tiling,
-                             src_tile_x, src_tile_y,
-                             dst_tile_x, dst_tile_y,
-                             chunk_w, chunk_h,
-                             logicop)) {
-            /* If this is ever going to fail, it will fail on the first chunk */
-            assert(chunk_x == 0 && chunk_y == 0);
-            return false;
-         }
-      }
-   }
-
-   return true;
-}
-
-/**
- * Implements a rectangular block transfer (blit) of pixels between two
- * miptrees.
- *
- * Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous,
- * but limited, pitches and sizes allowed.
- *
- * The src/dst coordinates are relative to the given level/slice of the
- * miptree.
- *
- * If @src_flip or @dst_flip is set, then the rectangle within that miptree
- * will be inverted (including scanline order) when copying.  This is common
- * in GL when copying between window system and user-created
- * renderbuffers/textures.
- */
-bool
-brw_miptree_blit(struct brw_context *brw,
-                 struct brw_mipmap_tree *src_mt,
-                 int src_level, int src_slice,
-                 uint32_t src_x, uint32_t src_y, bool src_flip,
-                 struct brw_mipmap_tree *dst_mt,
-                 int dst_level, int dst_slice,
-                 uint32_t dst_x, uint32_t dst_y, bool dst_flip,
-                 uint32_t width, uint32_t height,
-                 enum gl_logicop_mode logicop)
-{
-   /* The blitter doesn't understand multisampling at all. */
-   if (src_mt->surf.samples > 1 || dst_mt->surf.samples > 1)
-      return false;
-
-   /* No sRGB decode or encode is done by the hardware blitter, which is
-    * consistent with what we want in many callers (glCopyTexSubImage(),
-    * texture validation, etc.).
-    */
-   mesa_format src_format = _mesa_get_srgb_format_linear(src_mt->format);
-   mesa_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format);
-
-   /* The blitter doesn't support doing any format conversions.  We do also
-    * support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into
-    * the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A
-    * channel to 1.0 at the end. Also trivially ARGB2101010 to XRGB2101010,
-    * but not XRGB2101010 to ARGB2101010 yet.
-    */
-   if (!brw_miptree_blit_compatible_formats(src_format, dst_format)) {
-      perf_debug("%s: Can't use hardware blitter from %s to %s, "
-                 "falling back.\n", __func__,
-                 _mesa_get_format_name(src_format),
-                 _mesa_get_format_name(dst_format));
-      return false;
-   }
-
-   /* The blitter has no idea about HiZ or fast color clears, so we need to
-    * resolve the miptrees before we do anything.
-    */
-   brw_miptree_access_raw(brw, src_mt, src_level, src_slice, false);
-   brw_miptree_access_raw(brw, dst_mt, dst_level, dst_slice, true);
-
-   if (src_flip) {
-      const unsigned h0 = src_mt->surf.phys_level0_sa.height;
-      src_y = minify(h0, src_level - src_mt->first_level) - src_y - height;
-   }
-
-   if (dst_flip) {
-      const unsigned h0 = dst_mt->surf.phys_level0_sa.height;
-      dst_y = minify(h0, dst_level - dst_mt->first_level) - dst_y - height;
-   }
-
-   uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
-   brw_miptree_get_image_offset(src_mt, src_level, src_slice,
-                                  &src_image_x, &src_image_y);
-   brw_miptree_get_image_offset(dst_mt, dst_level, dst_slice,
-                                  &dst_image_x, &dst_image_y);
-   src_x += src_image_x;
-   src_y += src_image_y;
-   dst_x += dst_image_x;
-   dst_y += dst_image_y;
-
-   if (!emit_miptree_blit(brw, src_mt, src_x, src_y,
-                          dst_mt, dst_x, dst_y, width, height,
-                          src_flip != dst_flip, logicop)) {
-      return false;
-   }
-
-   /* XXX This could be done in a single pass using XY_FULL_MONO_PATTERN_BLT */
-   if (_mesa_get_format_bits(src_format, GL_ALPHA_BITS) == 0 &&
-       _mesa_get_format_bits(dst_format, GL_ALPHA_BITS) > 0) {
-      brw_miptree_set_alpha_to_one(brw, dst_mt, dst_x, dst_y, width, height);
-   }
-
-   return true;
-}
-
-bool
-brw_miptree_copy(struct brw_context *brw,
-                 struct brw_mipmap_tree *src_mt,
-                 int src_level, int src_slice,
-                 uint32_t src_x, uint32_t src_y,
-                 struct brw_mipmap_tree *dst_mt,
-                 int dst_level, int dst_slice,
-                 uint32_t dst_x, uint32_t dst_y,
-                 uint32_t src_width, uint32_t src_height)
-{
-   /* The blitter doesn't understand multisampling at all. */
-   if (src_mt->surf.samples > 1 || dst_mt->surf.samples > 1)
-      return false;
-
-   if (src_mt->format == MESA_FORMAT_S_UINT8)
-      return false;
-
-   /* The blitter has no idea about HiZ or fast color clears, so we need to
-    * resolve the miptrees before we do anything.
-    */
-   brw_miptree_access_raw(brw, src_mt, src_level, src_slice, false);
-   brw_miptree_access_raw(brw, dst_mt, dst_level, dst_slice, true);
-
-   uint32_t src_image_x, src_image_y;
-   brw_miptree_get_image_offset(src_mt, src_level, src_slice,
-                                &src_image_x, &src_image_y);
-
-   if (_mesa_is_format_compressed(src_mt->format)) {
-      GLuint bw, bh;
-      _mesa_get_format_block_size(src_mt->format, &bw, &bh);
-
-      /* Compressed textures need not have dimensions that are a multiple of
-       * the block size.  Rectangles in compressed textures do need to be a
-       * multiple of the block size.  The one exception is that the right and
-       * bottom edges may be at the right or bottom edge of the miplevel even
-       * if it's not aligned.
-       */
-      assert(src_x % bw == 0);
-      assert(src_y % bh == 0);
-
-      assert(src_width % bw == 0 ||
-             src_x + src_width ==
-             minify(src_mt->surf.logical_level0_px.width, src_level));
-      assert(src_height % bh == 0 ||
-             src_y + src_height ==
-             minify(src_mt->surf.logical_level0_px.height, src_level));
-
-      src_x /= (int)bw;
-      src_y /= (int)bh;
-      src_width = DIV_ROUND_UP(src_width, (int)bw);
-      src_height = DIV_ROUND_UP(src_height, (int)bh);
-   }
-   src_x += src_image_x;
-   src_y += src_image_y;
-
-   uint32_t dst_image_x, dst_image_y;
-   brw_miptree_get_image_offset(dst_mt, dst_level, dst_slice,
-                                &dst_image_x, &dst_image_y);
-
-   if (_mesa_is_format_compressed(dst_mt->format)) {
-      GLuint bw, bh;
-      _mesa_get_format_block_size(dst_mt->format, &bw, &bh);
-
-      assert(dst_x % bw == 0);
-      assert(dst_y % bh == 0);
-
-      dst_x /= (int)bw;
-      dst_y /= (int)bh;
-   }
-   dst_x += dst_image_x;
-   dst_y += dst_image_y;
-
-   return emit_miptree_blit(brw, src_mt, src_x, src_y,
-                            dst_mt, dst_x, dst_y,
-                            src_width, src_height, false, COLOR_LOGICOP_COPY);
-}
-
-bool
-brw_emit_immediate_color_expand_blit(struct brw_context *brw,
-                                     GLuint cpp,
-                                     GLubyte *src_bits, GLuint src_size,
-                                     GLuint fg_color,
-                                     GLshort dst_pitch,
-                                     struct brw_bo *dst_buffer,
-                                     GLuint dst_offset,
-                                     enum isl_tiling dst_tiling,
-                                     GLshort x, GLshort y,
-                                     GLshort w, GLshort h,
-                                     enum gl_logicop_mode logic_op)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   int dwords = ALIGN(src_size, 8) / 4;
-   uint32_t opcode, br13, blit_cmd;
-
-   if (dst_tiling != ISL_TILING_LINEAR) {
-      if (dst_offset & 4095)
-         return false;
-      if (dst_tiling == ISL_TILING_Y0)
-         return false;
-   }
-
-   assert((unsigned) logic_op <= 0x0f);
-   assert(dst_pitch > 0);
-
-   if (w < 0 || h < 0)
-      return true;
-
-   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
-       __func__,
-       dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
-
-   unsigned xy_setup_blt_length = devinfo->ver >= 8 ? 10 : 8;
-   brw_batch_require_space(brw, (xy_setup_blt_length * 4) +
-                                        (3 * 4) + dwords * 4);
-
-   opcode = XY_SETUP_BLT_CMD;
-   if (cpp == 4)
-      opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
-   if (dst_tiling != ISL_TILING_LINEAR) {
-      opcode |= XY_DST_TILED;
-      dst_pitch /= 4;
-   }
-
-   br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
-   br13 |= br13_for_cpp(cpp);
-
-   blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
-   if (dst_tiling != ISL_TILING_LINEAR)
-      blit_cmd |= XY_DST_TILED;
-
-   BEGIN_BATCH_BLT(xy_setup_blt_length + 3);
-   OUT_BATCH(opcode | (xy_setup_blt_length - 2));
-   OUT_BATCH(br13);
-   OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
-   OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
-   if (devinfo->ver >= 8) {
-      OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset);
-   } else {
-      OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset);
-   }
-   OUT_BATCH(0); /* bg */
-   OUT_BATCH(fg_color); /* fg */
-   OUT_BATCH(0); /* pattern base addr */
-   if (devinfo->ver >= 8)
-      OUT_BATCH(0);
-
-   OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
-   OUT_BATCH(SET_FIELD(y, BLT_Y) | SET_FIELD(x, BLT_X));
-   OUT_BATCH(SET_FIELD(y + h, BLT_Y) | SET_FIELD(x + w, BLT_X));
-   ADVANCE_BATCH();
-
-   brw_batch_data(brw, src_bits, dwords * 4);
-
-   brw_emit_mi_flush(brw);
-
-   return true;
-}
-
-/**
- * Used to initialize the alpha value of an ARGB8888 miptree after copying
- * into it from an XRGB8888 source.
- *
- * This is very common with glCopyTexImage2D().  Note that the coordinates are
- * relative to the start of the miptree, not relative to a slice within the
- * miptree.
- */
-static void
-brw_miptree_set_alpha_to_one(struct brw_context *brw,
-                             struct brw_mipmap_tree *mt,
-                             int x, int y, int width, int height)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   uint32_t BR13, CMD;
-   int pitch, cpp;
-
-   pitch = mt->surf.row_pitch_B;
-   cpp = mt->cpp;
-
-   DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
-       __func__, mt->bo, pitch, x, y, width, height);
-
-   /* Note: Currently only handles 8 bit alpha channel. Extension to < 8 Bit
-    * alpha channel would be likely possible via ROP code 0xfa instead of 0xf0
-    * and writing a suitable bit-mask instead of 0xffffffff.
-    */
-   BR13 = br13_for_cpp(cpp) | 0xf0 << 16;
-   CMD = XY_COLOR_BLT_CMD;
-   CMD |= XY_BLT_WRITE_ALPHA;
-
-   if (mt->surf.tiling != ISL_TILING_LINEAR) {
-      CMD |= XY_DST_TILED;
-      pitch /= 4;
-   }
-   BR13 |= pitch;
-
-   /* do space check before going any further */
-   if (!brw_batch_has_aperture_space(brw, mt->bo->size))
-      brw_batch_flush(brw);
-
-   unsigned length = devinfo->ver >= 8 ? 7 : 6;
-   const bool dst_y_tiled = mt->surf.tiling == ISL_TILING_Y0;
-
-   /* We need to split the blit into chunks that each fit within the blitter's
-    * restrictions.  We can't use a chunk size of 32768 because we need to
-    * ensure that src_tile_x + chunk_size fits.  We choose 16384 because it's
-    * a nice round power of two, big enough that performance won't suffer, and
-    * small enough to guarantee everything fits.
-    */
-   const uint32_t max_chunk_size = 16384;
-
-   for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) {
-      for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) {
-         const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x);
-         const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y);
-
-         uint64_t offset_B;
-         uint32_t tile_x, tile_y;
-         get_blit_intratile_offset_el(brw, mt,
-                                      x + chunk_x, y + chunk_y,
-                                      &offset_B, &tile_x, &tile_y);
-
-         BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, false);
-         OUT_BATCH(CMD | (length - 2));
-         OUT_BATCH(BR13);
-         OUT_BATCH(SET_FIELD(y + chunk_y, BLT_Y) |
-                   SET_FIELD(x + chunk_x, BLT_X));
-         OUT_BATCH(SET_FIELD(y + chunk_y + chunk_h, BLT_Y) |
-                   SET_FIELD(x + chunk_x + chunk_w, BLT_X));
-         if (devinfo->ver >= 8) {
-            OUT_RELOC64(mt->bo, RELOC_WRITE, mt->offset + offset_B);
-         } else {
-            OUT_RELOC(mt->bo, RELOC_WRITE, mt->offset + offset_B);
-         }
-         OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
-         ADVANCE_BATCH_TILED(dst_y_tiled, false);
-      }
-   }
-
-   brw_emit_mi_flush(brw);
-}
diff --git a/src/mesa/drivers/dri/i965/brw_blit.h b/src/mesa/drivers/dri/i965/brw_blit.h
deleted file mode 100644
index ab71420..0000000
--- a/src/mesa/drivers/dri/i965/brw_blit.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef BRW_BLIT_H
-#define BRW_BLIT_H
-
-#include "brw_context.h"
-
-bool brw_miptree_blit_compatible_formats(mesa_format src, mesa_format dst);
-
-bool brw_miptree_blit(struct brw_context *brw,
-                      struct brw_mipmap_tree *src_mt,
-                      int src_level, int src_slice,
-                      uint32_t src_x, uint32_t src_y, bool src_flip,
-                      struct brw_mipmap_tree *dst_mt,
-                      int dst_level, int dst_slice,
-                      uint32_t dst_x, uint32_t dst_y, bool dst_flip,
-                      uint32_t width, uint32_t height,
-                      enum gl_logicop_mode logicop);
-
-bool brw_miptree_copy(struct brw_context *brw,
-                      struct brw_mipmap_tree *src_mt,
-                      int src_level, int src_slice,
-                      uint32_t src_x, uint32_t src_y,
-                      struct brw_mipmap_tree *dst_mt,
-                      int dst_level, int dst_slice,
-                      uint32_t dst_x, uint32_t dst_y,
-                      uint32_t src_width, uint32_t src_height);
-
-bool
-brw_emit_immediate_color_expand_blit(struct brw_context *brw,
-                                     GLuint cpp,
-                                     GLubyte *src_bits, GLuint src_size,
-                                     GLuint fg_color,
-                                     GLshort dst_pitch,
-                                     struct brw_bo *dst_buffer,
-                                     GLuint dst_offset,
-                                     enum isl_tiling dst_tiling,
-                                     GLshort x, GLshort y,
-                                     GLshort w, GLshort h,
-                                     enum gl_logicop_mode logic_op);
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c
deleted file mode 100644
index 1b8ec47..0000000
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ /dev/null
@@ -1,1678 +0,0 @@
-/*
- * Copyright Â© 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "main/context.h"
-#include "main/teximage.h"
-#include "main/blend.h"
-#include "main/bufferobj.h"
-#include "main/enums.h"
-#include "main/fbobject.h"
-#include "main/image.h"
-#include "main/renderbuffer.h"
-#include "main/glformats.h"
-
-#include "brw_blorp.h"
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_meta_util.h"
-#include "brw_state.h"
-#include "brw_buffer_objects.h"
-#include "brw_fbo.h"
-#include "dev/intel_debug.h"
-
-#define FILE_DEBUG_FLAG DEBUG_BLORP
-
-static bool
-brw_blorp_lookup_shader(struct blorp_batch *batch,
-                        const void *key, uint32_t key_size,
-                        uint32_t *kernel_out, void *prog_data_out)
-{
-   struct brw_context *brw = batch->driver_batch;
-   return brw_search_cache(&brw->cache, BRW_CACHE_BLORP_PROG, key, key_size,
-                           kernel_out, prog_data_out, true);
-}
-
-static bool
-brw_blorp_upload_shader(struct blorp_batch *batch, uint32_t stage,
-                        const void *key, uint32_t key_size,
-                        const void *kernel, uint32_t kernel_size,
-                        const struct brw_stage_prog_data *prog_data,
-                        uint32_t prog_data_size,
-                        uint32_t *kernel_out, void *prog_data_out)
-{
-   struct brw_context *brw = batch->driver_batch;
-   brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG, key, key_size,
-                    kernel, kernel_size, prog_data, prog_data_size,
-                    kernel_out, prog_data_out);
-   return true;
-}
-
-void
-brw_blorp_init(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   blorp_init(&brw->blorp, brw, &brw->isl_dev, NULL);
-
-   brw->blorp.compiler = brw->screen->compiler;
-
-   switch (devinfo->ver) {
-   case 4:
-      if (devinfo->verx10 == 45) {
-         brw->blorp.exec = gfx45_blorp_exec;
-      } else {
-         brw->blorp.exec = gfx4_blorp_exec;
-      }
-      break;
-   case 5:
-      brw->blorp.exec = gfx5_blorp_exec;
-      break;
-   case 6:
-      brw->blorp.exec = gfx6_blorp_exec;
-      break;
-   case 7:
-      if (devinfo->verx10 == 75) {
-         brw->blorp.exec = gfx75_blorp_exec;
-      } else {
-         brw->blorp.exec = gfx7_blorp_exec;
-      }
-      break;
-   case 8:
-      brw->blorp.exec = gfx8_blorp_exec;
-      break;
-   case 9:
-      brw->blorp.exec = gfx9_blorp_exec;
-      break;
-   case 11:
-      brw->blorp.exec = gfx11_blorp_exec;
-      break;
-
-   default:
-      unreachable("Invalid gen");
-   }
-
-   brw->blorp.lookup_shader = brw_blorp_lookup_shader;
-   brw->blorp.upload_shader = brw_blorp_upload_shader;
-}
-
-static void
-blorp_surf_for_miptree(struct brw_context *brw,
-                       struct blorp_surf *surf,
-                       const struct brw_mipmap_tree *mt,
-                       enum isl_aux_usage aux_usage,
-                       bool is_render_target,
-                       unsigned *level,
-                       unsigned start_layer, unsigned num_layers)
-{
-   if (mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY) {
-      const unsigned num_samples = mt->surf.samples;
-      for (unsigned i = 0; i < num_layers; i++) {
-         for (unsigned s = 0; s < num_samples; s++) {
-            const unsigned phys_layer = (start_layer + i) * num_samples + s;
-            brw_miptree_check_level_layer(mt, *level, phys_layer);
-         }
-      }
-   } else {
-      for (unsigned i = 0; i < num_layers; i++)
-         brw_miptree_check_level_layer(mt, *level, start_layer + i);
-   }
-
-   *surf = (struct blorp_surf) {
-      .surf = &mt->surf,
-      .addr = (struct blorp_address) {
-         .buffer = mt->bo,
-         .offset = mt->offset,
-         .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,
-         .mocs = brw_mocs(&brw->isl_dev, mt->bo),
-      },
-      .aux_usage = aux_usage,
-      .tile_x_sa = mt->level[*level].level_x,
-      .tile_y_sa = mt->level[*level].level_y,
-   };
-
-   if (surf->aux_usage == ISL_AUX_USAGE_HIZ &&
-       !brw_miptree_level_has_hiz(mt, *level))
-      surf->aux_usage = ISL_AUX_USAGE_NONE;
-
-   if (surf->aux_usage != ISL_AUX_USAGE_NONE) {
-      /* We only really need a clear color if we also have an auxiliary
-       * surface.  Without one, it does nothing.
-       */
-      surf->clear_color =
-         brw_miptree_get_clear_color(mt, (struct brw_bo **)
-                                     &surf->clear_color_addr.buffer,
-                                     &surf->clear_color_addr.offset);
-
-      surf->aux_surf = &mt->aux_buf->surf;
-      surf->aux_addr = (struct blorp_address) {
-         .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,
-         .mocs = surf->addr.mocs,
-      };
-
-      surf->aux_addr.buffer = mt->aux_buf->bo;
-      surf->aux_addr.offset = mt->aux_buf->offset;
-   } else {
-      surf->aux_addr = (struct blorp_address) {
-         .buffer = NULL,
-      };
-      memset(&surf->clear_color, 0, sizeof(surf->clear_color));
-   }
-   assert((surf->aux_usage == ISL_AUX_USAGE_NONE) ==
-          (surf->aux_addr.buffer == NULL));
-
-   if (!is_render_target && brw->screen->devinfo.ver == 9)
-      gfx9_apply_single_tex_astc5x5_wa(brw, mt->format, surf->aux_usage);
-
-   /* ISL wants real levels, not offset ones. */
-   *level -= mt->first_level;
-}
-
-static bool
-brw_blorp_supports_dst_format(struct brw_context *brw, mesa_format format)
-{
-   /* If it's renderable, it's definitely supported. */
-   if (brw->mesa_format_supports_render[format])
-      return true;
-
-   /* BLORP can't compress anything */
-   if (_mesa_is_format_compressed(format))
-      return false;
-
-   /* No exotic formats such as GL_LUMINANCE_ALPHA */
-   if (_mesa_get_format_bits(format, GL_RED_BITS) == 0 &&
-       _mesa_get_format_bits(format, GL_DEPTH_BITS) == 0 &&
-       _mesa_get_format_bits(format, GL_STENCIL_BITS) == 0)
-      return false;
-
-   return true;
-}
-
-static enum isl_format
-brw_blorp_to_isl_format(struct brw_context *brw, mesa_format format,
-                        bool is_render_target)
-{
-   switch (format) {
-   case MESA_FORMAT_NONE:
-      return ISL_FORMAT_UNSUPPORTED;
-   case MESA_FORMAT_S_UINT8:
-      return ISL_FORMAT_R8_UINT;
-   case MESA_FORMAT_Z24_UNORM_X8_UINT:
-   case MESA_FORMAT_Z24_UNORM_S8_UINT:
-      return ISL_FORMAT_R24_UNORM_X8_TYPELESS;
-   case MESA_FORMAT_Z_FLOAT32:
-   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
-      return ISL_FORMAT_R32_FLOAT;
-   case MESA_FORMAT_Z_UNORM16:
-      return ISL_FORMAT_R16_UNORM;
-   default:
-      if (is_render_target) {
-         assert(brw_blorp_supports_dst_format(brw, format));
-         if (brw->mesa_format_supports_render[format]) {
-            return brw->mesa_to_isl_render_format[format];
-         } else {
-            return brw_isl_format_for_mesa_format(format);
-         }
-      } else {
-         /* Some destinations (is_render_target == true) are supported by
-          * blorp even though we technically can't render to them.
-          */
-         return brw_isl_format_for_mesa_format(format);
-      }
-   }
-}
-
-/**
- * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gfx7.5+
- * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
- *
- * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
- *         0          1          2          3             4            5
- *         4          5          6          7             0            1
- *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
- *
- * which is simply adding 4 then modding by 8 (or anding with 7).
- *
- * We then may need to apply workarounds for textureGather hardware bugs.
- */
-static enum isl_channel_select
-swizzle_to_scs(GLenum swizzle)
-{
-   return (enum isl_channel_select)((swizzle + 4) & 7);
-}
-
-/**
- * Note: if the src (or dst) is a 2D multisample array texture on Gfx7+ using
- * INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src_layer (dst_layer) is
- * the physical layer holding sample 0.  So, for example, if
- * src_mt->surf.samples == 4, then logical layer n corresponds to src_layer ==
- * 4*n.
- */
-void
-brw_blorp_blit_miptrees(struct brw_context *brw,
-                        struct brw_mipmap_tree *src_mt,
-                        unsigned src_level, unsigned src_layer,
-                        mesa_format src_format, int src_swizzle,
-                        struct brw_mipmap_tree *dst_mt,
-                        unsigned dst_level, unsigned dst_layer,
-                        mesa_format dst_format,
-                        float src_x0, float src_y0,
-                        float src_x1, float src_y1,
-                        float dst_x0, float dst_y0,
-                        float dst_x1, float dst_y1,
-                        GLenum gl_filter, bool mirror_x, bool mirror_y,
-                        bool decode_srgb, bool encode_srgb)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   DBG("%s from %dx %s mt %p %d %d (%f,%f) (%f,%f) "
-       "to %dx %s mt %p %d %d (%f,%f) (%f,%f) (flip %d,%d)\n",
-       __func__,
-       src_mt->surf.samples, _mesa_get_format_name(src_mt->format), src_mt,
-       src_level, src_layer, src_x0, src_y0, src_x1, src_y1,
-       dst_mt->surf.samples, _mesa_get_format_name(dst_mt->format), dst_mt,
-       dst_level, dst_layer, dst_x0, dst_y0, dst_x1, dst_y1,
-       mirror_x, mirror_y);
-
-   if (src_format == MESA_FORMAT_NONE)
-      src_format = src_mt->format;
-
-   if (dst_format == MESA_FORMAT_NONE)
-      dst_format = dst_mt->format;
-
-   if (!decode_srgb)
-      src_format = _mesa_get_srgb_format_linear(src_format);
-
-   if (!encode_srgb)
-      dst_format = _mesa_get_srgb_format_linear(dst_format);
-
-   /* When doing a multisample resolve of a GL_LUMINANCE32F or GL_INTENSITY32F
-    * texture, the above code configures the source format for L32_FLOAT or
-    * I32_FLOAT, and the destination format for R32_FLOAT.  On Sandy Bridge,
-    * the SAMPLE message appears to handle multisampled L32_FLOAT and
-    * I32_FLOAT textures incorrectly, resulting in blocky artifacts.  So work
-    * around the problem by using a source format of R32_FLOAT.  This
-    * shouldn't affect rendering correctness, since the destination format is
-    * R32_FLOAT, so only the contents of the red channel matters.
-    */
-   if (devinfo->ver == 6 &&
-       src_mt->surf.samples > 1 && dst_mt->surf.samples <= 1 &&
-       src_mt->format == dst_mt->format &&
-       (dst_format == MESA_FORMAT_L_FLOAT32 ||
-        dst_format == MESA_FORMAT_I_FLOAT32)) {
-      src_format = dst_format = MESA_FORMAT_R_FLOAT32;
-   }
-
-   enum blorp_filter blorp_filter;
-   if (fabsf(dst_x1 - dst_x0) == fabsf(src_x1 - src_x0) &&
-       fabsf(dst_y1 - dst_y0) == fabsf(src_y1 - src_y0)) {
-      if (src_mt->surf.samples > 1 && dst_mt->surf.samples <= 1) {
-         /* From the OpenGL ES 3.2 specification, section 16.2.1:
-          *
-          *    "If the read framebuffer is multisampled (its effective value
-          *    of SAMPLE_BUFFERS is one) and the draw framebuffer is not (its
-          *    value of SAMPLE_BUFFERS is zero), the samples corresponding to
-          *    each pixel location in the source are converted to a single
-          *    sample before being written to the destination.  The filter
-          *    parameter is ignored. If the source formats are integer types
-          *    or stencil values, a single sampleâs value is selected for each
-          *    pixel.  If the source formats are floating-point or normalized
-          *    types, the sample values for each pixel are resolved in an
-          *    implementation-dependent manner.  If the source formats are
-          *    depth values, sample values are resolved in an implementation-
-          *    dependent manner where the result will be between the minimum
-          *    and maximum depth values in the pixel."
-          *
-          * For depth and stencil resolves, we choose to always use the value
-          * at sample 0.
-          */
-         GLenum base_format = _mesa_get_format_base_format(src_mt->format);
-         if (base_format == GL_DEPTH_COMPONENT ||
-             base_format == GL_STENCIL_INDEX ||
-             base_format == GL_DEPTH_STENCIL ||
-             _mesa_is_format_integer(src_mt->format)) {
-            /* The OpenGL ES 3.2 spec says:
-             *
-             *    "If the source formats are integer types or stencil values,
-             *    a single sample's value is selected for each pixel."
-             *
-             * Just take sample 0 in this case.
-             */
-            blorp_filter = BLORP_FILTER_SAMPLE_0;
-         } else {
-            blorp_filter = BLORP_FILTER_AVERAGE;
-         }
-      } else {
-         /* From the OpenGL 4.6 specification, section 18.3.1:
-          *
-          *    "If the source and destination dimensions are identical, no
-          *    filtering is applied."
-          *
-          * Using BLORP_FILTER_NONE will also handle the upsample case by
-          * replicating the one value in the source to all values in the
-          * destination.
-          */
-         blorp_filter = BLORP_FILTER_NONE;
-      }
-   } else if (gl_filter == GL_LINEAR ||
-              gl_filter == GL_SCALED_RESOLVE_FASTEST_EXT ||
-              gl_filter == GL_SCALED_RESOLVE_NICEST_EXT) {
-      blorp_filter = BLORP_FILTER_BILINEAR;
-   } else {
-      blorp_filter = BLORP_FILTER_NEAREST;
-   }
-
-   enum isl_format src_isl_format =
-      brw_blorp_to_isl_format(brw, src_format, false);
-   enum isl_aux_usage src_aux_usage =
-      brw_miptree_texture_aux_usage(brw, src_mt, src_isl_format,
-                                    0 /* The astc5x5 WA isn't needed */);
-   /* We do format workarounds for some depth formats so we can't reliably
-    * sample with HiZ.  One of these days, we should fix that.
-    */
-   if (src_aux_usage == ISL_AUX_USAGE_HIZ && src_mt->format != src_format)
-      src_aux_usage = ISL_AUX_USAGE_NONE;
-   const bool src_clear_supported =
-      src_aux_usage != ISL_AUX_USAGE_NONE && src_mt->format == src_format;
-   brw_miptree_prepare_access(brw, src_mt, src_level, 1, src_layer, 1,
-                              src_aux_usage, src_clear_supported);
-
-   enum isl_format dst_isl_format =
-      brw_blorp_to_isl_format(brw, dst_format, true);
-   enum isl_aux_usage dst_aux_usage =
-      brw_miptree_render_aux_usage(brw, dst_mt, dst_isl_format, false, false);
-   const bool dst_clear_supported = dst_aux_usage != ISL_AUX_USAGE_NONE;
-   brw_miptree_prepare_access(brw, dst_mt, dst_level, 1, dst_layer, 1,
-                              dst_aux_usage, dst_clear_supported);
-
-   struct blorp_surf src_surf, dst_surf;
-   blorp_surf_for_miptree(brw, &src_surf, src_mt, src_aux_usage, false,
-                          &src_level, src_layer, 1);
-   blorp_surf_for_miptree(brw, &dst_surf, dst_mt, dst_aux_usage, true,
-                          &dst_level, dst_layer, 1);
-
-   struct isl_swizzle src_isl_swizzle = {
-      .r = swizzle_to_scs(GET_SWZ(src_swizzle, 0)),
-      .g = swizzle_to_scs(GET_SWZ(src_swizzle, 1)),
-      .b = swizzle_to_scs(GET_SWZ(src_swizzle, 2)),
-      .a = swizzle_to_scs(GET_SWZ(src_swizzle, 3)),
-   };
-
-   struct blorp_batch batch;
-   blorp_batch_init(&brw->blorp, &batch, brw, 0);
-   blorp_blit(&batch, &src_surf, src_level, src_layer,
-              src_isl_format, src_isl_swizzle,
-              &dst_surf, dst_level, dst_layer,
-              dst_isl_format, ISL_SWIZZLE_IDENTITY,
-              src_x0, src_y0, src_x1, src_y1,
-              dst_x0, dst_y0, dst_x1, dst_y1,
-              blorp_filter, mirror_x, mirror_y);
-   blorp_batch_finish(&batch);
-
-   brw_miptree_finish_write(brw, dst_mt, dst_level, dst_layer, 1,
-                            dst_aux_usage);
-}
-
-void
-brw_blorp_copy_miptrees(struct brw_context *brw,
-                        struct brw_mipmap_tree *src_mt,
-                        unsigned src_level, unsigned src_layer,
-                        struct brw_mipmap_tree *dst_mt,
-                        unsigned dst_level, unsigned dst_layer,
-                        unsigned src_x, unsigned src_y,
-                        unsigned dst_x, unsigned dst_y,
-                        unsigned src_width, unsigned src_height)
-{
-   DBG("%s from %dx %s mt %p %d %d (%d,%d) %dx%d"
-       "to %dx %s mt %p %d %d (%d,%d)\n",
-       __func__,
-       src_mt->surf.samples, _mesa_get_format_name(src_mt->format), src_mt,
-       src_level, src_layer, src_x, src_y, src_width, src_height,
-       dst_mt->surf.samples, _mesa_get_format_name(dst_mt->format), dst_mt,
-       dst_level, dst_layer, dst_x, dst_y);
-
-   enum isl_aux_usage src_aux_usage, dst_aux_usage;
-   bool src_clear_supported, dst_clear_supported;
-
-   switch (src_mt->aux_usage) {
-   case ISL_AUX_USAGE_HIZ:
-      if (brw_miptree_sample_with_hiz(brw, src_mt)) {
-         src_aux_usage = src_mt->aux_usage;
-         src_clear_supported = true;
-      } else {
-         src_aux_usage = ISL_AUX_USAGE_NONE;
-         src_clear_supported = false;
-      }
-      break;
-   case ISL_AUX_USAGE_MCS:
-   case ISL_AUX_USAGE_CCS_E:
-      src_aux_usage = src_mt->aux_usage;
-      src_clear_supported = false;
-      break;
-   default:
-      src_aux_usage = ISL_AUX_USAGE_NONE;
-      src_clear_supported = false;
-      break;
-   }
-
-   switch (dst_mt->aux_usage) {
-   case ISL_AUX_USAGE_MCS:
-   case ISL_AUX_USAGE_CCS_E:
-      dst_aux_usage = dst_mt->aux_usage;
-      dst_clear_supported = false;
-      break;
-   default:
-      dst_aux_usage = ISL_AUX_USAGE_NONE;
-      dst_clear_supported = false;
-      break;
-   }
-
-   brw_miptree_prepare_access(brw, src_mt, src_level, 1, src_layer, 1,
-                              src_aux_usage, src_clear_supported);
-   brw_miptree_prepare_access(brw, dst_mt, dst_level, 1, dst_layer, 1,
-                              dst_aux_usage, dst_clear_supported);
-
-   struct blorp_surf src_surf, dst_surf;
-   blorp_surf_for_miptree(brw, &src_surf, src_mt, src_aux_usage, false,
-                          &src_level, src_layer, 1);
-   blorp_surf_for_miptree(brw, &dst_surf, dst_mt, dst_aux_usage, true,
-                          &dst_level, dst_layer, 1);
-
-   /* The hardware seems to have issues with having a two different format
-    * views of the same texture in the sampler cache at the same time.  It's
-    * unclear exactly what the issue is but it hurts glCopyImageSubData
-    * particularly badly because it does a lot of format reinterprets.  We
-    * badly need better understanding of the issue and a better fix but this
-    * works for now and fixes CTS tests.
-    *
-    * TODO: Remove this hack!
-    */
-   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL |
-                                    PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
-
-   struct blorp_batch batch;
-   blorp_batch_init(&brw->blorp, &batch, brw, 0);
-   blorp_copy(&batch, &src_surf, src_level, src_layer,
-              &dst_surf, dst_level, dst_layer,
-              src_x, src_y, dst_x, dst_y, src_width, src_height);
-   blorp_batch_finish(&batch);
-
-   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL |
-                                    PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
-
-   brw_miptree_finish_write(brw, dst_mt, dst_level, dst_layer, 1,
-                            dst_aux_usage);
-}
-
-void
-brw_blorp_copy_buffers(struct brw_context *brw,
-                       struct brw_bo *src_bo,
-                       unsigned src_offset,
-                       struct brw_bo *dst_bo,
-                       unsigned dst_offset,
-                       unsigned size)
-{
-   DBG("%s %d bytes from %p[%d] to %p[%d]",
-       __func__, size, src_bo, src_offset, dst_bo, dst_offset);
-
-   struct blorp_batch batch;
-   struct blorp_address src = {
-      .buffer = src_bo, .offset = src_offset,
-      .mocs = brw_mocs(&brw->isl_dev, src_bo),
-   };
-   struct blorp_address dst = {
-      .buffer = dst_bo, .offset = dst_offset,
-      .mocs = brw_mocs(&brw->isl_dev, dst_bo),
-   };
-
-   blorp_batch_init(&brw->blorp, &batch, brw, 0);
-   blorp_buffer_copy(&batch, src, dst, size);
-   blorp_batch_finish(&batch);
-}
-
-
-static struct brw_mipmap_tree *
-find_miptree(GLbitfield buffer_bit, struct brw_renderbuffer *irb)
-{
-   struct brw_mipmap_tree *mt = irb->mt;
-   if (buffer_bit == GL_STENCIL_BUFFER_BIT && mt->stencil_mt)
-      mt = mt->stencil_mt;
-   return mt;
-}
-
-static int
-blorp_get_texture_swizzle(const struct brw_renderbuffer *irb)
-{
-   return irb->Base.Base._BaseFormat == GL_RGB ?
-      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE) :
-      SWIZZLE_XYZW;
-}
-
-static void
-do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,
-              struct brw_renderbuffer *src_irb, mesa_format src_format,
-              struct brw_renderbuffer *dst_irb, mesa_format dst_format,
-              GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
-              GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
-              GLenum filter, bool mirror_x, bool mirror_y)
-{
-   const struct gl_context *ctx = &brw->ctx;
-
-   /* Find source/dst miptrees */
-   struct brw_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb);
-   struct brw_mipmap_tree *dst_mt = find_miptree(buffer_bit, dst_irb);
-
-   const bool do_srgb = ctx->Color.sRGBEnabled;
-
-   /* Do the blit */
-   brw_blorp_blit_miptrees(brw,
-                           src_mt, src_irb->mt_level, src_irb->mt_layer,
-                           src_format, blorp_get_texture_swizzle(src_irb),
-                           dst_mt, dst_irb->mt_level, dst_irb->mt_layer,
-                           dst_format,
-                           srcX0, srcY0, srcX1, srcY1,
-                           dstX0, dstY0, dstX1, dstY1,
-                           filter, mirror_x, mirror_y,
-                           do_srgb, do_srgb);
-
-   dst_irb->need_downsample = true;
-}
-
-static bool
-try_blorp_blit(struct brw_context *brw,
-               const struct gl_framebuffer *read_fb,
-               const struct gl_framebuffer *draw_fb,
-               GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
-               GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
-               GLenum filter, GLbitfield buffer_bit)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-
-   /* Sync up the state of window system buffers.  We need to do this before
-    * we go looking for the buffers.
-    */
-   brw_prepare_render(brw);
-
-   bool mirror_x, mirror_y;
-   if (brw_meta_mirror_clip_and_scissor(ctx, read_fb, draw_fb,
-                                        &srcX0, &srcY0, &srcX1, &srcY1,
-                                        &dstX0, &dstY0, &dstX1, &dstY1,
-                                        &mirror_x, &mirror_y))
-      return true;
-
-   /* Find buffers */
-   struct brw_renderbuffer *src_irb;
-   struct brw_renderbuffer *dst_irb;
-   struct brw_mipmap_tree *src_mt;
-   struct brw_mipmap_tree *dst_mt;
-   switch (buffer_bit) {
-   case GL_COLOR_BUFFER_BIT:
-      src_irb = brw_renderbuffer(read_fb->_ColorReadBuffer);
-      for (unsigned i = 0; i < draw_fb->_NumColorDrawBuffers; ++i) {
-         dst_irb = brw_renderbuffer(draw_fb->_ColorDrawBuffers[i]);
-	 if (dst_irb)
-            do_blorp_blit(brw, buffer_bit,
-                          src_irb, src_irb->Base.Base.Format,
-                          dst_irb, dst_irb->Base.Base.Format,
-                          srcX0, srcY0, srcX1, srcY1,
-                          dstX0, dstY0, dstX1, dstY1,
-                          filter, mirror_x, mirror_y);
-      }
-      break;
-   case GL_DEPTH_BUFFER_BIT:
-      src_irb =
-         brw_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer);
-      dst_irb =
-         brw_renderbuffer(draw_fb->Attachment[BUFFER_DEPTH].Renderbuffer);
-      src_mt = find_miptree(buffer_bit, src_irb);
-      dst_mt = find_miptree(buffer_bit, dst_irb);
-
-      /* We also can't handle any combined depth-stencil formats because we
-       * have to reinterpret as a color format.
-       */
-      if (_mesa_get_format_base_format(src_mt->format) == GL_DEPTH_STENCIL ||
-          _mesa_get_format_base_format(dst_mt->format) == GL_DEPTH_STENCIL)
-         return false;
-
-      do_blorp_blit(brw, buffer_bit, src_irb, MESA_FORMAT_NONE,
-                    dst_irb, MESA_FORMAT_NONE, srcX0, srcY0,
-                    srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
-                    filter, mirror_x, mirror_y);
-      break;
-   case GL_STENCIL_BUFFER_BIT:
-      /* Blorp doesn't support combined depth stencil which is all we have
-       * prior to gfx6.
-       */
-      if (devinfo->ver < 6)
-         return false;
-
-      src_irb =
-         brw_renderbuffer(read_fb->Attachment[BUFFER_STENCIL].Renderbuffer);
-      dst_irb =
-         brw_renderbuffer(draw_fb->Attachment[BUFFER_STENCIL].Renderbuffer);
-      do_blorp_blit(brw, buffer_bit, src_irb, MESA_FORMAT_NONE,
-                    dst_irb, MESA_FORMAT_NONE, srcX0, srcY0,
-                    srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
-                    filter, mirror_x, mirror_y);
-      break;
-   default:
-      unreachable("not reached");
-   }
-
-   return true;
-}
-
-static void
-apply_y_flip(int *y0, int *y1, int height)
-{
-   int tmp = height - *y0;
-   *y0 = height - *y1;
-   *y1 = tmp;
-}
-
-bool
-brw_blorp_copytexsubimage(struct brw_context *brw,
-                          struct gl_renderbuffer *src_rb,
-                          struct gl_texture_image *dst_image,
-                          int slice,
-                          int srcX0, int srcY0,
-                          int dstX0, int dstY0,
-                          int width, int height)
-{
-   struct gl_context *ctx = &brw->ctx;
-   struct brw_renderbuffer *src_irb = brw_renderbuffer(src_rb);
-   struct brw_texture_image *intel_image = brw_texture_image(dst_image);
-
-   /* No pixel transfer operations (zoom, bias, mapping), just a blit */
-   if (brw->ctx._ImageTransferState)
-      return false;
-
-   /* Sync up the state of window system buffers.  We need to do this before
-    * we go looking at the src renderbuffer's miptree.
-    */
-   brw_prepare_render(brw);
-
-   struct brw_mipmap_tree *src_mt = src_irb->mt;
-   struct brw_mipmap_tree *dst_mt = intel_image->mt;
-
-   /* We can't handle any combined depth-stencil formats because we have to
-    * reinterpret as a color format.
-    */
-   if (_mesa_get_format_base_format(src_mt->format) == GL_DEPTH_STENCIL ||
-       _mesa_get_format_base_format(dst_mt->format) == GL_DEPTH_STENCIL)
-      return false;
-
-   if (!brw_blorp_supports_dst_format(brw, dst_image->TexFormat))
-      return false;
-
-   /* Source clipping shouldn't be necessary, since copytexsubimage (in
-    * src/mesa/main/teximage.c) calls _mesa_clip_copytexsubimage() which
-    * takes care of it.
-    *
-    * Destination clipping shouldn't be necessary since the restrictions on
-    * glCopyTexSubImage prevent the user from specifying a destination rectangle
-    * that falls outside the bounds of the destination texture.
-    * See error_check_subtexture_dimensions().
-    */
-
-   int srcY1 = srcY0 + height;
-   int srcX1 = srcX0 + width;
-   int dstX1 = dstX0 + width;
-   int dstY1 = dstY0 + height;
-
-   /* Account for the fact that in the system framebuffer, the origin is at
-    * the lower left.
-    */
-   bool mirror_y = ctx->ReadBuffer->FlipY;
-   if (mirror_y)
-      apply_y_flip(&srcY0, &srcY1, src_rb->Height);
-
-   /* Account for face selection and texture view MinLayer */
-   int dst_slice = slice + dst_image->TexObject->Attrib.MinLayer + dst_image->Face;
-   int dst_level = dst_image->Level + dst_image->TexObject->Attrib.MinLevel;
-
-   brw_blorp_blit_miptrees(brw,
-                           src_mt, src_irb->mt_level, src_irb->mt_layer,
-                           src_rb->Format, blorp_get_texture_swizzle(src_irb),
-                           dst_mt, dst_level, dst_slice,
-                           dst_image->TexFormat,
-                           srcX0, srcY0, srcX1, srcY1,
-                           dstX0, dstY0, dstX1, dstY1,
-                           GL_NEAREST, false, mirror_y,
-                           false, false);
-
-   /* If we're copying to a packed depth stencil texture and the source
-    * framebuffer has separate stencil, we need to also copy the stencil data
-    * over.
-    */
-   src_rb = ctx->ReadBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
-   if (_mesa_get_format_bits(dst_image->TexFormat, GL_STENCIL_BITS) > 0 &&
-       src_rb != NULL) {
-      src_irb = brw_renderbuffer(src_rb);
-      src_mt = src_irb->mt;
-
-      if (src_mt->stencil_mt)
-         src_mt = src_mt->stencil_mt;
-      if (dst_mt->stencil_mt)
-         dst_mt = dst_mt->stencil_mt;
-
-      if (src_mt != dst_mt) {
-         brw_blorp_blit_miptrees(brw,
-                                 src_mt, src_irb->mt_level, src_irb->mt_layer,
-                                 src_mt->format,
-                                 blorp_get_texture_swizzle(src_irb),
-                                 dst_mt, dst_level, dst_slice,
-                                 dst_mt->format,
-                                 srcX0, srcY0, srcX1, srcY1,
-                                 dstX0, dstY0, dstX1, dstY1,
-                                 GL_NEAREST, false, mirror_y,
-                                 false, false);
-      }
-   }
-
-   return true;
-}
-
-
-GLbitfield
-brw_blorp_framebuffer(struct brw_context *brw,
-                      struct gl_framebuffer *readFb,
-                      struct gl_framebuffer *drawFb,
-                      GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
-                      GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
-                      GLbitfield mask, GLenum filter)
-{
-   static GLbitfield buffer_bits[] = {
-      GL_COLOR_BUFFER_BIT,
-      GL_DEPTH_BUFFER_BIT,
-      GL_STENCIL_BUFFER_BIT,
-   };
-
-   for (unsigned int i = 0; i < ARRAY_SIZE(buffer_bits); ++i) {
-      if ((mask & buffer_bits[i]) &&
-       try_blorp_blit(brw, readFb, drawFb,
-                      srcX0, srcY0, srcX1, srcY1,
-                      dstX0, dstY0, dstX1, dstY1,
-                      filter, buffer_bits[i])) {
-         mask &= ~buffer_bits[i];
-      }
-   }
-
-   /* try_blorp_blit should always be successful for color blits. */
-   assert(!(mask & GL_COLOR_BUFFER_BIT));
-   return mask;
-}
-
-static struct brw_bo *
-blorp_get_client_bo(struct brw_context *brw,
-                    unsigned w, unsigned h, unsigned d,
-                    GLenum target, GLenum format, GLenum type,
-                    const void *pixels,
-                    const struct gl_pixelstore_attrib *packing,
-                    uint32_t *offset_out, uint32_t *row_stride_out,
-                    uint32_t *image_stride_out, bool read_only)
-{
-   /* Account for SKIP_PIXELS, SKIP_ROWS, ALIGNMENT, and SKIP_IMAGES */
-   const GLuint dims = _mesa_get_texture_dimensions(target);
-   const uint32_t first_pixel = _mesa_image_offset(dims, packing, w, h,
-                                                   format, type, 0, 0, 0);
-   const uint32_t last_pixel =  _mesa_image_offset(dims, packing, w, h,
-                                                   format, type,
-                                                   d - 1, h - 1, w);
-   const uint32_t stride = _mesa_image_row_stride(packing, w, format, type);
-   const uint32_t size = last_pixel - first_pixel;
-
-   *row_stride_out = stride;
-   *image_stride_out = _mesa_image_image_stride(packing, w, h, format, type);
-
-   if (packing->BufferObj) {
-      const uint32_t offset = first_pixel + (intptr_t)pixels;
-
-      if (!read_only) {
-         const int32_t cpp = _mesa_bytes_per_pixel(format, type);
-         assert(cpp > 0);
-
-         if ((offset % cpp) || (stride % cpp)) {
-            perf_debug("Bad PBO alignment; fallback to CPU mapping\n");
-            return NULL;
-         }
-      }
-
-      /* This is a user-provided PBO. We just need to get the BO out */
-      struct brw_buffer_object *intel_pbo =
-         brw_buffer_object(packing->BufferObj);
-      struct brw_bo *bo =
-         brw_bufferobj_buffer(brw, intel_pbo, offset, size, !read_only);
-
-      /* We take a reference to the BO so that the caller can just always
-       * unref without having to worry about whether it's a user PBO or one
-       * we created.
-       */
-      brw_bo_reference(bo);
-
-      *offset_out = offset;
-      return bo;
-   } else {
-      /* Someone should have already checked that there is data to upload. */
-      assert(pixels);
-
-      /* Creating a temp buffer currently only works for upload */
-      assert(read_only);
-
-      /* This is not a user-provided PBO.  Instead, pixels is a pointer to CPU
-       * data which we need to copy into a BO.
-       */
-      struct brw_bo *bo =
-         brw_bo_alloc(brw->bufmgr, "tmp_tex_subimage_src", size,
-                      BRW_MEMZONE_OTHER);
-      if (bo == NULL) {
-         perf_debug("%s: temp bo creation failed: size = %u\n", __func__,
-                    size);
-         return NULL;
-      }
-
-      if (brw_bo_subdata(bo, 0, size, pixels + first_pixel)) {
-         perf_debug("%s: temp bo upload failed\n", __func__);
-         brw_bo_unreference(bo);
-         return NULL;
-      }
-
-      *offset_out = 0;
-      return bo;
-   }
-}
-
-/* Consider all the restrictions and determine the format of the source. */
-static mesa_format
-blorp_get_client_format(struct brw_context *brw,
-                        GLenum format, GLenum type,
-                        const struct gl_pixelstore_attrib *packing)
-{
-   if (brw->ctx._ImageTransferState)
-      return MESA_FORMAT_NONE;
-
-   if (packing->SwapBytes || packing->LsbFirst || packing->Invert) {
-      perf_debug("%s: unsupported gl_pixelstore_attrib\n", __func__);
-      return MESA_FORMAT_NONE;
-   }
-
-   if (format != GL_RED &&
-       format != GL_RG &&
-       format != GL_RGB &&
-       format != GL_BGR &&
-       format != GL_RGBA &&
-       format != GL_BGRA &&
-       format != GL_ALPHA &&
-       format != GL_RED_INTEGER &&
-       format != GL_RG_INTEGER &&
-       format != GL_RGB_INTEGER &&
-       format != GL_BGR_INTEGER &&
-       format != GL_RGBA_INTEGER &&
-       format != GL_BGRA_INTEGER) {
-      perf_debug("%s: %s not supported", __func__,
-                 _mesa_enum_to_string(format));
-      return MESA_FORMAT_NONE;
-   }
-
-   return _mesa_tex_format_from_format_and_type(&brw->ctx, format, type);
-}
-
-bool
-brw_blorp_upload_miptree(struct brw_context *brw,
-                         struct brw_mipmap_tree *dst_mt,
-                         mesa_format dst_format,
-                         uint32_t level, uint32_t x, uint32_t y, uint32_t z,
-                         uint32_t width, uint32_t height, uint32_t depth,
-                         GLenum target, GLenum format, GLenum type,
-                         const void *pixels,
-                         const struct gl_pixelstore_attrib *packing)
-{
-   const mesa_format src_format =
-      blorp_get_client_format(brw, format, type, packing);
-   if (src_format == MESA_FORMAT_NONE)
-      return false;
-
-   if (!brw->mesa_format_supports_render[dst_format]) {
-      perf_debug("%s: can't use %s as render target\n", __func__,
-                 _mesa_get_format_name(dst_format));
-      return false;
-   }
-
-   uint32_t src_offset, src_row_stride, src_image_stride;
-   struct brw_bo *src_bo =
-      blorp_get_client_bo(brw, width, height, depth,
-                          target, format, type, pixels, packing,
-                          &src_offset, &src_row_stride,
-                          &src_image_stride, true);
-   if (src_bo == NULL)
-      return false;
-
-   /* Now that source is offset to correct starting point, adjust the
-    * given dimensions to treat 1D arrays as 2D.
-    */
-   if (target == GL_TEXTURE_1D_ARRAY) {
-      assert(depth == 1);
-      assert(z == 0);
-      depth = height;
-      height = 1;
-      z = y;
-      y = 0;
-      src_image_stride = src_row_stride;
-   }
-
-   brw_miptree_check_level_layer(dst_mt, level, z + depth - 1);
-
-   bool result = false;
-
-   /* Blit slice-by-slice creating a single-slice miptree for each layer. Even
-    * in case of linear buffers hardware wants image arrays to be aligned by
-    * four rows. This way hardware only gets one image at a time and any
-    * source alignment will do.
-    */
-   for (unsigned i = 0; i < depth; ++i) {
-      struct brw_mipmap_tree *src_mt =
-         brw_miptree_create_for_bo(brw, src_bo, src_format,
-                                   src_offset + i * src_image_stride,
-                                   width, height, 1,
-                                   src_row_stride,
-                                   ISL_TILING_LINEAR, 0);
-
-      if (!src_mt) {
-         perf_debug("%s: miptree creation for src failed\n", __func__);
-         goto err;
-      }
-
-      /* In case exact match is needed, copy using equivalent UINT formats
-       * preventing hardware from changing presentation for SNORM -1.
-       */
-      if (src_mt->format == dst_format) {
-         brw_blorp_copy_miptrees(brw, src_mt, 0, 0,
-                                 dst_mt, level, z + i,
-                                 0, 0, x, y, width, height);
-      } else {
-         brw_blorp_blit_miptrees(brw, src_mt, 0, 0,
-                                 src_format, SWIZZLE_XYZW,
-                                 dst_mt, level, z + i,
-                                 dst_format,
-                                 0, 0, width, height,
-                                 x, y, x + width, y + height,
-                                 GL_NEAREST, false, false, false, false);
-      }
-
-      brw_miptree_release(&src_mt);
-   }
-
-   result = true;
-
-err:
-   brw_bo_unreference(src_bo);
-
-   return result;
-}
-
-bool
-brw_blorp_download_miptree(struct brw_context *brw,
-                           struct brw_mipmap_tree *src_mt,
-                           mesa_format src_format, uint32_t src_swizzle,
-                           uint32_t level, uint32_t x, uint32_t y, uint32_t z,
-                           uint32_t width, uint32_t height, uint32_t depth,
-                           GLenum target, GLenum format, GLenum type,
-                           bool y_flip, const void *pixels,
-                           const struct gl_pixelstore_attrib *packing)
-{
-   const mesa_format dst_format =
-      blorp_get_client_format(brw, format, type, packing);
-   if (dst_format == MESA_FORMAT_NONE)
-      return false;
-
-   if (!brw->mesa_format_supports_render[dst_format]) {
-      perf_debug("%s: can't use %s as render target\n", __func__,
-                 _mesa_get_format_name(dst_format));
-      return false;
-   }
-
-   /* We can't fetch from LUMINANCE or intensity as that would require a
-    * non-trivial swizzle.
-    */
-   switch (_mesa_get_format_base_format(src_format)) {
-   case GL_LUMINANCE:
-   case GL_LUMINANCE_ALPHA:
-   case GL_INTENSITY:
-      return false;
-   default:
-      break;
-   }
-
-   /* This pass only works for PBOs */
-   assert(packing->BufferObj);
-
-   uint32_t dst_offset, dst_row_stride, dst_image_stride;
-   struct brw_bo *dst_bo =
-      blorp_get_client_bo(brw, width, height, depth,
-                          target, format, type, pixels, packing,
-                          &dst_offset, &dst_row_stride,
-                          &dst_image_stride, false);
-   if (dst_bo == NULL)
-      return false;
-
-   /* Now that source is offset to correct starting point, adjust the
-    * given dimensions to treat 1D arrays as 2D.
-    */
-   if (target == GL_TEXTURE_1D_ARRAY) {
-      assert(depth == 1);
-      assert(z == 0);
-      depth = height;
-      height = 1;
-      z = y;
-      y = 0;
-      dst_image_stride = dst_row_stride;
-   }
-
-   brw_miptree_check_level_layer(src_mt, level, z + depth - 1);
-
-   int y0 = y;
-   int y1 = y + height;
-   if (y_flip) {
-      apply_y_flip(&y0, &y1, minify(src_mt->surf.phys_level0_sa.height,
-                                    level - src_mt->first_level));
-   }
-
-   bool result = false;
-
-   /* Blit slice-by-slice creating a single-slice miptree for each layer. Even
-    * in case of linear buffers hardware wants image arrays to be aligned by
-    * four rows. This way hardware only gets one image at a time and any
-    * source alignment will do.
-    */
-   for (unsigned i = 0; i < depth; ++i) {
-      struct brw_mipmap_tree *dst_mt =
-         brw_miptree_create_for_bo(brw, dst_bo, dst_format,
-                                   dst_offset + i * dst_image_stride,
-                                   width, height, 1,
-                                   dst_row_stride,
-                                   ISL_TILING_LINEAR, 0);
-
-      if (!dst_mt) {
-         perf_debug("%s: miptree creation for src failed\n", __func__);
-         goto err;
-      }
-
-      /* In case exact match is needed, copy using equivalent UINT formats
-       * preventing hardware from changing presentation for SNORM -1.
-       */
-      if (dst_mt->format == src_format && !y_flip &&
-          src_swizzle == SWIZZLE_XYZW) {
-         brw_blorp_copy_miptrees(brw, src_mt, level, z + i,
-                                 dst_mt, 0, 0,
-                                 x, y, 0, 0, width, height);
-      } else {
-         brw_blorp_blit_miptrees(brw, src_mt, level, z + i,
-                                 src_format, src_swizzle,
-                                 dst_mt, 0, 0, dst_format,
-                                 x, y0, x + width, y1,
-                                 0, 0, width, height,
-                                 GL_NEAREST, false, y_flip, false, false);
-      }
-
-      brw_miptree_release(&dst_mt);
-   }
-
-   result = true;
-
-   /* As we implement PBO transfers by binding the user-provided BO as a
-    * fake framebuffer and rendering to it.  This breaks the invariant of the
-    * GL that nothing is able to render to a BO, causing nondeterministic
-    * corruption issues because the render cache is not coherent with a
-    * number of other caches that the BO could potentially be bound to
-    * afterwards.
-    *
-    * This could be solved in the same way that we guarantee texture
-    * coherency after a texture is attached to a framebuffer and
-    * rendered to, but that would involve checking *all* BOs bound to
-    * the pipeline for the case we need to emit a cache flush due to
-    * previous rendering to any of them -- Including vertex, index,
-    * uniform, atomic counter, shader image, transform feedback,
-    * indirect draw buffers, etc.
-    *
-    * That would increase the per-draw call overhead even though it's
-    * very unlikely that any of the BOs bound to the pipeline has been
-    * rendered to via a PBO at any point, so it seems better to just
-    * flush here unconditionally.
-    */
-   brw_emit_mi_flush(brw);
-
-err:
-   brw_bo_unreference(dst_bo);
-
-   return result;
-}
-
-static bool
-set_write_disables(const struct brw_renderbuffer *irb,
-                   const unsigned color_mask, uint8_t *color_write_disable)
-{
-   /* Format information in the renderbuffer represents the requirements
-    * given by the client. There are cases where the backing miptree uses,
-    * for example, RGBA to represent RGBX. Since the client is only expecting
-    * RGB we can treat alpha as not used and write whatever we like into it.
-    */
-   const GLenum base_format = irb->Base.Base._BaseFormat;
-   const int components = _mesa_components_in_format(base_format);
-   assert(components > 0);
-   *color_write_disable = ~color_mask & BITFIELD_MASK(components);
-   return *color_write_disable;
-}
-
-static void
-do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
-                      struct gl_renderbuffer *rb, unsigned buf,
-                      bool partial_clear, bool encode_srgb)
-{
-   struct gl_context *ctx = &brw->ctx;
-   struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-   uint32_t x0, x1, y0, y1;
-
-   mesa_format format = irb->Base.Base.Format;
-   if (!encode_srgb)
-      format = _mesa_get_srgb_format_linear(format);
-   enum isl_format isl_format = brw->mesa_to_isl_render_format[format];
-
-   x0 = fb->_Xmin;
-   x1 = fb->_Xmax;
-   if (fb->FlipY) {
-      y0 = rb->Height - fb->_Ymax;
-      y1 = rb->Height - fb->_Ymin;
-   } else {
-      y0 = fb->_Ymin;
-      y1 = fb->_Ymax;
-   }
-
-   /* If the clear region is empty, just return. */
-   if (x0 == x1 || y0 == y1)
-      return;
-
-   bool can_fast_clear = !partial_clear;
-
-   if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR))
-      can_fast_clear = false;
-
-   uint8_t color_write_disable = 0;
-   if (set_write_disables(irb, GET_COLORMASK(ctx->Color.ColorMask, buf),
-                          &color_write_disable))
-      can_fast_clear = false;
-
-   /* We store clear colors as floats or uints as needed.  If there are
-    * texture views in play, the formats will not properly be respected
-    * during resolves because the resolve operations only know about the
-    * miptree and not the renderbuffer.
-    */
-   if (irb->Base.Base.Format != irb->mt->format)
-      can_fast_clear = false;
-
-   if (!irb->mt->supports_fast_clear ||
-       !brw_is_color_fast_clear_compatible(brw, irb->mt, &ctx->Color.ClearColor))
-      can_fast_clear = false;
-
-   /* Surface state can only record one fast clear color value. Therefore
-    * unless different levels/layers agree on the color it can be used to
-    * represent only single level/layer. Here it will be reserved for the
-    * first slice (level 0, layer 0).
-    */
-   if (irb->layer_count > 1 || irb->mt_level || irb->mt_layer)
-      can_fast_clear = false;
-
-   unsigned level = irb->mt_level;
-   const unsigned num_layers = fb->MaxNumLayers ? irb->layer_count : 1;
-
-   /* If the MCS buffer hasn't been allocated yet, we need to allocate it now.
-    */
-   if (can_fast_clear && !irb->mt->aux_buf) {
-      assert(irb->mt->aux_usage == ISL_AUX_USAGE_CCS_D);
-      if (!brw_miptree_alloc_aux(brw, irb->mt)) {
-         /* We're out of memory. Fall back to a non-fast clear. */
-         can_fast_clear = false;
-      }
-   }
-
-   if (can_fast_clear) {
-      const enum isl_aux_state aux_state =
-         brw_miptree_get_aux_state(irb->mt, irb->mt_level, irb->mt_layer);
-      union isl_color_value clear_color =
-         brw_meta_convert_fast_clear_color(brw, irb->mt,
-                                           &ctx->Color.ClearColor);
-
-      /* If the buffer is already in ISL_AUX_STATE_CLEAR and the clear color
-       * hasn't changed, the clear is redundant and can be skipped.
-       */
-      if (!brw_miptree_set_clear_color(brw, irb->mt, clear_color) &&
-          aux_state == ISL_AUX_STATE_CLEAR) {
-         return;
-      }
-
-      DBG("%s (fast) to mt %p level %d layers %d+%d\n", __FUNCTION__,
-          irb->mt, irb->mt_level, irb->mt_layer, num_layers);
-
-      /* We can't setup the blorp_surf until we've allocated the MCS above */
-      struct blorp_surf surf;
-      blorp_surf_for_miptree(brw, &surf, irb->mt, irb->mt->aux_usage, true,
-                             &level, irb->mt_layer, num_layers);
-
-      /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
-       *
-       *    "Any transition from any value in {Clear, Render, Resolve} to a
-       *    different value in {Clear, Render, Resolve} requires end of pipe
-       *    synchronization."
-       *
-       * In other words, fast clear ops are not properly synchronized with
-       * other drawing.  We need to use a PIPE_CONTROL to ensure that the
-       * contents of the previous draw hit the render target before we resolve
-       * and again afterwards to ensure that the resolve is complete before we
-       * do any more regular drawing.
-       */
-      brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
-
-      struct blorp_batch batch;
-      blorp_batch_init(&brw->blorp, &batch, brw, 0);
-      blorp_fast_clear(&batch, &surf, isl_format_srgb_to_linear(isl_format),
-                       ISL_SWIZZLE_IDENTITY,
-                       level, irb->mt_layer, num_layers, x0, y0, x1, y1);
-      blorp_batch_finish(&batch);
-
-      brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
-
-      /* Now that the fast clear has occurred, put the buffer in
-       * INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing
-       * redundant clears.
-       */
-      brw_miptree_set_aux_state(brw, irb->mt, irb->mt_level,
-                                irb->mt_layer, num_layers,
-                                ISL_AUX_STATE_CLEAR);
-   } else {
-      DBG("%s (slow) to mt %p level %d layer %d+%d\n", __FUNCTION__,
-          irb->mt, irb->mt_level, irb->mt_layer, num_layers);
-
-      enum isl_aux_usage aux_usage =
-         brw_miptree_render_aux_usage(brw, irb->mt, isl_format, false, false);
-      brw_miptree_prepare_render(brw, irb->mt, level, irb->mt_layer,
-                                 num_layers, aux_usage);
-
-      struct blorp_surf surf;
-      blorp_surf_for_miptree(brw, &surf, irb->mt, aux_usage, true,
-                             &level, irb->mt_layer, num_layers);
-
-      union isl_color_value clear_color;
-      memcpy(clear_color.f32, ctx->Color.ClearColor.f, sizeof(float) * 4);
-
-      struct blorp_batch batch;
-      blorp_batch_init(&brw->blorp, &batch, brw, 0);
-      blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
-                  level, irb->mt_layer, num_layers,
-                  x0, y0, x1, y1,
-                  clear_color, color_write_disable);
-      blorp_batch_finish(&batch);
-
-      brw_miptree_finish_render(brw, irb->mt, level, irb->mt_layer,
-                                num_layers, aux_usage);
-   }
-
-   return;
-}
-
-void
-brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
-                      GLbitfield mask, bool partial_clear, bool encode_srgb)
-{
-   for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) {
-      struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf];
-      struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-
-      /* Only clear the buffers present in the provided mask */
-      if (((1 << fb->_ColorDrawBufferIndexes[buf]) & mask) == 0)
-         continue;
-
-      /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
-       * the framebuffer can be complete with some attachments missing.  In
-       * this case the _ColorDrawBuffers pointer will be NULL.
-       */
-      if (rb == NULL)
-         continue;
-
-      do_single_blorp_clear(brw, fb, rb, buf, partial_clear, encode_srgb);
-      irb->need_downsample = true;
-   }
-
-   return;
-}
-
-void
-brw_blorp_clear_depth_stencil(struct brw_context *brw,
-                              struct gl_framebuffer *fb,
-                              GLbitfield mask, bool partial_clear)
-{
-   const struct gl_context *ctx = &brw->ctx;
-   struct gl_renderbuffer *depth_rb =
-      fb->Attachment[BUFFER_DEPTH].Renderbuffer;
-   struct gl_renderbuffer *stencil_rb =
-      fb->Attachment[BUFFER_STENCIL].Renderbuffer;
-
-   if (!depth_rb || ctx->Depth.Mask == GL_FALSE)
-      mask &= ~BUFFER_BIT_DEPTH;
-
-   if (!stencil_rb || (ctx->Stencil.WriteMask[0] & 0xff) == 0)
-      mask &= ~BUFFER_BIT_STENCIL;
-
-   if (!(mask & (BUFFER_BITS_DEPTH_STENCIL)))
-      return;
-
-   uint32_t x0, x1, y0, y1, rb_height;
-   if (depth_rb) {
-      rb_height = depth_rb->Height;
-      if (stencil_rb) {
-         assert(depth_rb->Width == stencil_rb->Width);
-         assert(depth_rb->Height == stencil_rb->Height);
-      }
-   } else {
-      assert(stencil_rb);
-      rb_height = stencil_rb->Height;
-   }
-
-   x0 = fb->_Xmin;
-   x1 = fb->_Xmax;
-   if (fb->FlipY) {
-      y0 = rb_height - fb->_Ymax;
-      y1 = rb_height - fb->_Ymin;
-   } else {
-      y0 = fb->_Ymin;
-      y1 = fb->_Ymax;
-   }
-
-   /* If the clear region is empty, just return. */
-   if (x0 == x1 || y0 == y1)
-      return;
-
-   uint32_t level = 0, start_layer = 0, num_layers;
-   struct blorp_surf depth_surf, stencil_surf;
-
-   struct brw_mipmap_tree *depth_mt = NULL;
-   if (mask & BUFFER_BIT_DEPTH) {
-      struct brw_renderbuffer *irb = brw_renderbuffer(depth_rb);
-      depth_mt = find_miptree(GL_DEPTH_BUFFER_BIT, irb);
-
-      level = irb->mt_level;
-      start_layer = irb->mt_layer;
-      num_layers = fb->MaxNumLayers ? irb->layer_count : 1;
-
-      brw_miptree_prepare_depth(brw, depth_mt, level, start_layer, num_layers);
-
-      unsigned depth_level = level;
-      blorp_surf_for_miptree(brw, &depth_surf, depth_mt, depth_mt->aux_usage,
-                             true, &depth_level, start_layer, num_layers);
-      assert(depth_level == level);
-   }
-
-   uint8_t stencil_mask = 0;
-   struct brw_mipmap_tree *stencil_mt = NULL;
-   if (mask & BUFFER_BIT_STENCIL) {
-      struct brw_renderbuffer *irb = brw_renderbuffer(stencil_rb);
-      stencil_mt = find_miptree(GL_STENCIL_BUFFER_BIT, irb);
-
-      if (mask & BUFFER_BIT_DEPTH) {
-         assert(level == irb->mt_level);
-         assert(start_layer == irb->mt_layer);
-         assert(num_layers == fb->MaxNumLayers ? irb->layer_count : 1);
-      }
-
-      level = irb->mt_level;
-      start_layer = irb->mt_layer;
-      num_layers = fb->MaxNumLayers ? irb->layer_count : 1;
-
-      stencil_mask = ctx->Stencil.WriteMask[0] & 0xff;
-
-      brw_miptree_prepare_access(brw, stencil_mt, level, 1,
-                                 start_layer, num_layers,
-                                 ISL_AUX_USAGE_NONE, false);
-
-      unsigned stencil_level = level;
-      blorp_surf_for_miptree(brw, &stencil_surf, stencil_mt,
-                             ISL_AUX_USAGE_NONE, true,
-                             &stencil_level, start_layer, num_layers);
-   }
-
-   assert((mask & BUFFER_BIT_DEPTH) || stencil_mask);
-
-   struct blorp_batch batch;
-   blorp_batch_init(&brw->blorp, &batch, brw, 0);
-   blorp_clear_depth_stencil(&batch, &depth_surf, &stencil_surf,
-                             level, start_layer, num_layers,
-                             x0, y0, x1, y1,
-                             (mask & BUFFER_BIT_DEPTH), ctx->Depth.Clear,
-                             stencil_mask, ctx->Stencil.Clear);
-   blorp_batch_finish(&batch);
-
-   if (mask & BUFFER_BIT_DEPTH) {
-      brw_miptree_finish_depth(brw, depth_mt, level,
-                               start_layer, num_layers, true);
-   }
-
-   if (stencil_mask) {
-      brw_miptree_finish_write(brw, stencil_mt, level,
-                               start_layer, num_layers,
-                               ISL_AUX_USAGE_NONE);
-   }
-}
-
-void
-brw_blorp_resolve_color(struct brw_context *brw, struct brw_mipmap_tree *mt,
-                        unsigned level, unsigned layer,
-                        enum isl_aux_op resolve_op)
-{
-   DBG("%s to mt %p level %u layer %u\n", __FUNCTION__, mt, level, layer);
-
-   const mesa_format format = _mesa_get_srgb_format_linear(mt->format);
-
-   struct blorp_surf surf;
-   blorp_surf_for_miptree(brw, &surf, mt, mt->aux_usage, true,
-                          &level, layer, 1 /* num_layers */);
-
-   /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
-    *
-    *    "Any transition from any value in {Clear, Render, Resolve} to a
-    *    different value in {Clear, Render, Resolve} requires end of pipe
-    *    synchronization."
-    *
-    * In other words, fast clear ops are not properly synchronized with
-    * other drawing.  We need to use a PIPE_CONTROL to ensure that the
-    * contents of the previous draw hit the render target before we resolve
-    * and again afterwards to ensure that the resolve is complete before we
-    * do any more regular drawing.
-    */
-   brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
-
-
-   struct blorp_batch batch;
-   blorp_batch_init(&brw->blorp, &batch, brw, 0);
-   blorp_ccs_resolve(&batch, &surf, level, layer, 1,
-                     brw_blorp_to_isl_format(brw, format, true),
-                     resolve_op);
-   blorp_batch_finish(&batch);
-
-   /* See comment above */
-   brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
-}
-
-void
-brw_blorp_mcs_partial_resolve(struct brw_context *brw,
-                              struct brw_mipmap_tree *mt,
-                              uint32_t start_layer, uint32_t num_layers)
-{
-   DBG("%s to mt %p layers %u-%u\n", __FUNCTION__, mt,
-       start_layer, start_layer + num_layers - 1);
-
-   assert(mt->aux_usage == ISL_AUX_USAGE_MCS);
-
-   const mesa_format format = _mesa_get_srgb_format_linear(mt->format);
-   enum isl_format isl_format = brw_blorp_to_isl_format(brw, format, true);
-
-   struct blorp_surf surf;
-   uint32_t level = 0;
-   blorp_surf_for_miptree(brw, &surf, mt, ISL_AUX_USAGE_MCS, true,
-                          &level, start_layer, num_layers);
-
-   struct blorp_batch batch;
-   blorp_batch_init(&brw->blorp, &batch, brw, 0);
-   blorp_mcs_partial_resolve(&batch, &surf, isl_format,
-                             start_layer, num_layers);
-   blorp_batch_finish(&batch);
-}
-
-/**
- * Perform a HiZ or depth resolve operation.
- *
- * For an overview of HiZ ops, see the following sections of the Sandy Bridge
- * PRM, Volume 1, Part 2:
- *   - 7.5.3.1 Depth Buffer Clear
- *   - 7.5.3.2 Depth Buffer Resolve
- *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
- */
-void
-brw_hiz_exec(struct brw_context *brw, struct brw_mipmap_tree *mt,
-             unsigned int level, unsigned int start_layer,
-             unsigned int num_layers, enum isl_aux_op op)
-{
-   assert(brw_miptree_level_has_hiz(mt, level));
-   assert(op != ISL_AUX_OP_NONE);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const char *opname = NULL;
-
-   switch (op) {
-   case ISL_AUX_OP_FULL_RESOLVE:
-      opname = "depth resolve";
-      break;
-   case ISL_AUX_OP_AMBIGUATE:
-      opname = "hiz ambiguate";
-      break;
-   case ISL_AUX_OP_FAST_CLEAR:
-      opname = "depth clear";
-      break;
-   case ISL_AUX_OP_PARTIAL_RESOLVE:
-   case ISL_AUX_OP_NONE:
-      unreachable("Invalid HiZ op");
-   }
-
-   DBG("%s %s to mt %p level %d layers %d-%d\n",
-       __func__, opname, mt, level, start_layer, start_layer + num_layers - 1);
-
-   /* The following stalls and flushes are only documented to be required for
-    * HiZ clear operations.  However, they also seem to be required for
-    * resolve operations.
-    */
-   if (devinfo->ver == 6) {
-      /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
-       *
-       *   "If other rendering operations have preceded this clear, a
-       *   PIPE_CONTROL with write cache flush enabled and Z-inhibit
-       *   disabled must be issued before the rectangle primitive used for
-       *   the depth buffer clear operation.
-       */
-       brw_emit_pipe_control_flush(brw,
-                                   PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                                   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                                   PIPE_CONTROL_CS_STALL);
-   } else if (devinfo->ver >= 7) {
-      /*
-       * From the Ivybridge PRM, volume 2, "Depth Buffer Clear":
-       *
-       *   If other rendering operations have preceded this clear, a
-       *   PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
-       *   enabled must be issued before the rectangle primitive used for
-       *   the depth buffer clear operation.
-       *
-       * Same applies for Gfx8 and Gfx9.
-       *
-       * In addition, from the Ivybridge PRM, volume 2, 1.10.4.1
-       * PIPE_CONTROL, Depth Cache Flush Enable:
-       *
-       *   This bit must not be set when Depth Stall Enable bit is set in
-       *   this packet.
-       *
-       * This is confirmed to hold for real, HSW gets immediate gpu hangs.
-       *
-       * Therefore issue two pipe control flushes, one for cache flush and
-       * another for depth stall.
-       */
-       brw_emit_pipe_control_flush(brw,
-                                   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                                   PIPE_CONTROL_CS_STALL);
-
-       brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
-   }
-
-   assert(mt->aux_usage == ISL_AUX_USAGE_HIZ && mt->aux_buf);
-
-   struct blorp_surf surf;
-   blorp_surf_for_miptree(brw, &surf, mt, ISL_AUX_USAGE_HIZ, true,
-                          &level, start_layer, num_layers);
-
-   struct blorp_batch batch;
-   blorp_batch_init(&brw->blorp, &batch, brw,
-                    BLORP_BATCH_NO_UPDATE_CLEAR_COLOR);
-   blorp_hiz_op(&batch, &surf, level, start_layer, num_layers, op);
-   blorp_batch_finish(&batch);
-
-   /* The following stalls and flushes are only documented to be required for
-    * HiZ clear operations.  However, they also seem to be required for
-    * resolve operations.
-    */
-   if (devinfo->ver == 6) {
-      /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
-       *
-       *     "DevSNB, DevSNB-B{W/A}]: Depth buffer clear pass must be
-       *     followed by a PIPE_CONTROL command with DEPTH_STALL bit set
-       *     and Then followed by Depth FLUSH'
-      */
-      brw_emit_pipe_control_flush(brw,
-                                  PIPE_CONTROL_DEPTH_STALL);
-
-      brw_emit_pipe_control_flush(brw,
-                                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                                  PIPE_CONTROL_CS_STALL);
-   } else if (devinfo->ver >= 8) {
-      /*
-       * From the Broadwell PRM, volume 7, "Depth Buffer Clear":
-       *
-       *    "Depth buffer clear pass using any of the methods (WM_STATE,
-       *    3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a
-       *    PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits
-       *    "set" before starting to render.  DepthStall and DepthFlush are
-       *    not needed between consecutive depth clear passes nor is it
-       *    required if the depth clear pass was done with
-       *    'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP."
-       *
-       *  TODO: Such as the spec says, this could be conditional.
-       */
-      brw_emit_pipe_control_flush(brw,
-                                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                                  PIPE_CONTROL_DEPTH_STALL);
-
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
deleted file mode 100644
index 35822ab..0000000
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright Â© 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_BLORP_H
-#define BRW_BLORP_H
-
-#include "blorp/blorp.h"
-#include "brw_mipmap_tree.h"
-#include "program/prog_instruction.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void brw_blorp_init(struct brw_context *brw);
-
-void
-brw_blorp_blit_miptrees(struct brw_context *brw,
-                        struct brw_mipmap_tree *src_mt,
-                        unsigned src_level, unsigned src_layer,
-                        mesa_format src_format, int src_swizzle,
-                        struct brw_mipmap_tree *dst_mt,
-                        unsigned dst_level, unsigned dst_layer,
-                        mesa_format dst_format,
-                        float src_x0, float src_y0,
-                        float src_x1, float src_y1,
-                        float dst_x0, float dst_y0,
-                        float dst_x1, float dst_y1,
-                        GLenum filter, bool mirror_x, bool mirror_y,
-                        bool decode_srgb, bool encode_srgb);
-
-void
-brw_blorp_copy_miptrees(struct brw_context *brw,
-                        struct brw_mipmap_tree *src_mt,
-                        unsigned src_level, unsigned src_logical_layer,
-                        struct brw_mipmap_tree *dst_mt,
-                        unsigned dst_level, unsigned dst_logical_layer,
-                        unsigned src_x, unsigned src_y,
-                        unsigned dst_x, unsigned dst_y,
-                        unsigned src_width, unsigned src_height);
-
-void
-brw_blorp_copy_buffers(struct brw_context *brw,
-                       struct brw_bo *src_bo,
-                       unsigned src_offset,
-                       struct brw_bo *dst_bo,
-                       unsigned dst_offset,
-                       unsigned size);
-
-bool
-brw_blorp_upload_miptree(struct brw_context *brw,
-                         struct brw_mipmap_tree *dst_mt,
-                         mesa_format dst_format,
-                         uint32_t level, uint32_t x, uint32_t y, uint32_t z,
-                         uint32_t width, uint32_t height, uint32_t depth,
-                         GLenum target, GLenum format, GLenum type,
-                         const void *pixels,
-                         const struct gl_pixelstore_attrib *packing);
-
-bool
-brw_blorp_download_miptree(struct brw_context *brw,
-                           struct brw_mipmap_tree *src_mt,
-                           mesa_format src_format, uint32_t src_swizzle,
-                           uint32_t level, uint32_t x, uint32_t y, uint32_t z,
-                           uint32_t width, uint32_t height, uint32_t depth,
-                           GLenum target, GLenum format, GLenum type,
-                           bool y_flip, const void *pixels,
-                           const struct gl_pixelstore_attrib *packing);
-
-void
-brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
-                      GLbitfield mask, bool partial_clear, bool encode_srgb);
-void
-brw_blorp_clear_depth_stencil(struct brw_context *brw,
-                              struct gl_framebuffer *fb,
-                              GLbitfield mask, bool partial_clear);
-
-void
-brw_blorp_resolve_color(struct brw_context *brw,
-                        struct brw_mipmap_tree *mt,
-                        unsigned level, unsigned layer,
-                        enum isl_aux_op resolve_op);
-
-void
-brw_blorp_mcs_partial_resolve(struct brw_context *brw,
-                              struct brw_mipmap_tree *mt,
-                              uint32_t start_layer, uint32_t num_layers);
-
-void
-brw_hiz_exec(struct brw_context *brw, struct brw_mipmap_tree *mt,
-             unsigned int level, unsigned int start_layer,
-             unsigned int num_layers, enum isl_aux_op op);
-
-void gfx4_blorp_exec(struct blorp_batch *batch,
-                     const struct blorp_params *params);
-void gfx45_blorp_exec(struct blorp_batch *batch,
-                      const struct blorp_params *params);
-void gfx5_blorp_exec(struct blorp_batch *batch,
-                     const struct blorp_params *params);
-void gfx6_blorp_exec(struct blorp_batch *batch,
-                     const struct blorp_params *params);
-void gfx7_blorp_exec(struct blorp_batch *batch,
-                     const struct blorp_params *params);
-void gfx75_blorp_exec(struct blorp_batch *batch,
-                      const struct blorp_params *params);
-void gfx8_blorp_exec(struct blorp_batch *batch,
-                     const struct blorp_params *params);
-void gfx9_blorp_exec(struct blorp_batch *batch,
-                     const struct blorp_params *params);
-void gfx11_blorp_exec(struct blorp_batch *batch,
-                      const struct blorp_params *params);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* BRW_BLORP_H */
diff --git a/src/mesa/drivers/dri/i965/brw_buffer_objects.c b/src/mesa/drivers/dri/i965/brw_buffer_objects.c
deleted file mode 100644
index 929ff22..0000000
--- a/src/mesa/drivers/dri/i965/brw_buffer_objects.c
+++ /dev/null
@@ -1,710 +0,0 @@
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * @file brw_buffer_objects.c
- *
- * This provides core GL buffer object functionality.
- */
-
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/streaming-load-memcpy.h"
-#include "main/bufferobj.h"
-#include "x86/common_x86_asm.h"
-#include "util/u_memory.h"
-
-#include "brw_context.h"
-#include "brw_blorp.h"
-#include "brw_buffer_objects.h"
-#include "brw_batch.h"
-
-static void
-mark_buffer_gpu_usage(struct brw_buffer_object *intel_obj,
-                               uint32_t offset, uint32_t size)
-{
-   intel_obj->gpu_active_start = MIN2(intel_obj->gpu_active_start, offset);
-   intel_obj->gpu_active_end = MAX2(intel_obj->gpu_active_end, offset + size);
-}
-
-static void
-mark_buffer_inactive(struct brw_buffer_object *intel_obj)
-{
-   intel_obj->gpu_active_start = ~0;
-   intel_obj->gpu_active_end = 0;
-}
-
-static void
-mark_buffer_valid_data(struct brw_buffer_object *intel_obj,
-                       uint32_t offset, uint32_t size)
-{
-   intel_obj->valid_data_start = MIN2(intel_obj->valid_data_start, offset);
-   intel_obj->valid_data_end = MAX2(intel_obj->valid_data_end, offset + size);
-}
-
-static void
-mark_buffer_invalid(struct brw_buffer_object *intel_obj)
-{
-   intel_obj->valid_data_start = ~0;
-   intel_obj->valid_data_end = 0;
-}
-
-/** Allocates a new brw_bo to store the data for the buffer object. */
-static void
-alloc_buffer_object(struct brw_context *brw,
-                    struct brw_buffer_object *intel_obj)
-{
-   const struct gl_context *ctx = &brw->ctx;
-
-   uint64_t size = intel_obj->Base.Size;
-   if (ctx->Const.RobustAccess) {
-      /* Pad out buffer objects with an extra 2kB (half a page).
-       *
-       * When pushing UBOs, we need to safeguard against 3DSTATE_CONSTANT_*
-       * reading out of bounds memory.  The application might bind a UBO that's
-       * smaller than what the program expects.  Ideally, we'd bind an extra
-       * push buffer containing zeros, but we have a limited number of those,
-       * so it's not always viable.  Our only safe option is to pad all buffer
-       * objects by the maximum push data length, so that it will never read
-       * past the end of a BO.
-       *
-       * This is unfortunate, but it should result in at most 1 extra page,
-       * which probably isn't too terrible.
-       */
-      size += 64 * 32; /* max read length of 64 256-bit units */
-   }
-   intel_obj->buffer =
-      brw_bo_alloc(brw->bufmgr, "bufferobj", size, BRW_MEMZONE_OTHER);
-
-   /* the buffer might be bound as a uniform buffer, need to update it
-    */
-   if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
-      brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
-   if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
-      brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
-   if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
-      brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
-   if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
-      brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
-
-   mark_buffer_inactive(intel_obj);
-   mark_buffer_invalid(intel_obj);
-}
-
-static void
-release_buffer(struct brw_buffer_object *intel_obj)
-{
-   brw_bo_unreference(intel_obj->buffer);
-   intel_obj->buffer = NULL;
-}
-
-/**
- * The NewBufferObject() driver hook.
- *
- * Allocates a new brw_buffer_object structure and initializes it.
- *
- * There is some duplication between mesa's bufferobjects and our
- * bufmgr buffers.  Both have an integer handle and a hashtable to
- * lookup an opaque structure.  It would be nice if the handles and
- * internal structure where somehow shared.
- */
-static struct gl_buffer_object *
-brw_new_buffer_object(struct gl_context * ctx, GLuint name)
-{
-   struct brw_buffer_object *obj = CALLOC_STRUCT(brw_buffer_object);
-   if (!obj) {
-      _mesa_error_no_memory(__func__);
-      return NULL;
-   }
-
-   _mesa_initialize_buffer_object(ctx, &obj->Base, name);
-
-   obj->buffer = NULL;
-
-   return &obj->Base;
-}
-
-/**
- * The DeleteBuffer() driver hook.
- *
- * Deletes a single OpenGL buffer object.  Used by glDeleteBuffers().
- */
-static void
-brw_delete_buffer(struct gl_context * ctx, struct gl_buffer_object *obj)
-{
-   struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
-
-   assert(intel_obj);
-
-   /* Buffer objects are automatically unmapped when deleting according
-    * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy
-    * (though it does if you call glDeleteBuffers)
-    */
-   _mesa_buffer_unmap_all_mappings(ctx, obj);
-
-   brw_bo_unreference(intel_obj->buffer);
-   _mesa_delete_buffer_object(ctx, obj);
-}
-
-
-/**
- * The BufferData() driver hook.
- *
- * Implements glBufferData(), which recreates a buffer object's data store
- * and populates it with the given data, if present.
- *
- * Any data that was previously stored in the buffer object is lost.
- *
- * \return true for success, false if out of memory
- */
-static GLboolean
-brw_buffer_data(struct gl_context *ctx,
-                GLenum target,
-                GLsizeiptrARB size,
-                const GLvoid *data,
-                GLenum usage,
-                GLbitfield storageFlags,
-                struct gl_buffer_object *obj)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
-
-   /* Part of the ABI, but this function doesn't use it.
-    */
-   (void) target;
-
-   intel_obj->Base.Size = size;
-   intel_obj->Base.Usage = usage;
-   intel_obj->Base.StorageFlags = storageFlags;
-
-   assert(!obj->Mappings[MAP_USER].Pointer); /* Mesa should have unmapped it */
-   assert(!obj->Mappings[MAP_INTERNAL].Pointer);
-
-   if (intel_obj->buffer != NULL)
-      release_buffer(intel_obj);
-
-   if (size != 0) {
-      alloc_buffer_object(brw, intel_obj);
-      if (!intel_obj->buffer)
-         return false;
-
-      if (data != NULL) {
-         brw_bo_subdata(intel_obj->buffer, 0, size, data);
-         mark_buffer_valid_data(intel_obj, 0, size);
-      }
-   }
-
-   return true;
-}
-
-static GLboolean
-brw_buffer_data_mem(struct gl_context *ctx,
-                    GLenum target,
-                    GLsizeiptrARB size,
-                    struct gl_memory_object *memObj,
-                    GLuint64 offset,
-                    GLenum usage,
-                    struct gl_buffer_object *bufObj)
-{
-   struct brw_buffer_object *intel_obj = brw_buffer_object(bufObj);
-   struct brw_memory_object *intel_memObj = brw_memory_object(memObj);
-
-   /* Part of the ABI, but this function doesn't use it.
-    */
-   (void) target;
-
-   intel_obj->Base.Size = size;
-   intel_obj->Base.Usage = usage;
-   intel_obj->Base.StorageFlags = 0;
-
-   assert(!bufObj->Mappings[MAP_USER].Pointer); /* Mesa should have unmapped it */
-   assert(!bufObj->Mappings[MAP_INTERNAL].Pointer);
-
-   if (intel_obj->buffer != NULL)
-      release_buffer(intel_obj);
-
-   if (size != 0) {
-      intel_obj->buffer = intel_memObj->bo;
-      mark_buffer_valid_data(intel_obj, offset, size);
-   }
-
-   return true;
-}
-
-/**
- * The BufferSubData() driver hook.
- *
- * Implements glBufferSubData(), which replaces a portion of the data in a
- * buffer object.
- *
- * If the data range specified by (size + offset) extends beyond the end of
- * the buffer or if data is NULL, no copy is performed.
- */
-static void
-brw_buffer_subdata(struct gl_context *ctx,
-                   GLintptrARB offset,
-                   GLsizeiptrARB size,
-                   const GLvoid *data,
-                   struct gl_buffer_object *obj)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
-   bool busy;
-
-   if (size == 0)
-      return;
-
-   assert(intel_obj);
-
-   /* See if we can unsynchronized write the data into the user's BO. This
-    * avoids GPU stalls in unfortunately common user patterns (uploading
-    * sequentially into a BO, with draw calls in between each upload).
-    *
-    * Once we've hit this path, we mark this GL BO as preferring stalling to
-    * blits, so that we can hopefully hit this path again in the future
-    * (otherwise, an app that might occasionally stall but mostly not will end
-    * up with blitting all the time, at the cost of bandwidth)
-    */
-   if (offset + size <= intel_obj->gpu_active_start ||
-       intel_obj->gpu_active_end <= offset ||
-       offset + size <= intel_obj->valid_data_start ||
-       intel_obj->valid_data_end <= offset) {
-      void *map = brw_bo_map(brw, intel_obj->buffer, MAP_WRITE | MAP_ASYNC);
-      memcpy(map + offset, data, size);
-      brw_bo_unmap(intel_obj->buffer);
-
-      if (intel_obj->gpu_active_end > intel_obj->gpu_active_start)
-         intel_obj->prefer_stall_to_blit = true;
-
-      mark_buffer_valid_data(intel_obj, offset, size);
-      return;
-   }
-
-   busy =
-      brw_bo_busy(intel_obj->buffer) ||
-      brw_batch_references(&brw->batch, intel_obj->buffer);
-
-   if (busy) {
-      if (size == intel_obj->Base.Size ||
-          (intel_obj->valid_data_start >= offset &&
-           intel_obj->valid_data_end <= offset + size)) {
-         /* Replace the current busy bo so the subdata doesn't stall. */
-         brw_bo_unreference(intel_obj->buffer);
-         alloc_buffer_object(brw, intel_obj);
-      } else if (!intel_obj->prefer_stall_to_blit) {
-         perf_debug("Using a blit copy to avoid stalling on "
-                    "glBufferSubData(%ld, %ld) (%ldkb) to a busy "
-                    "(%d-%d) / valid (%d-%d) buffer object.\n",
-                    (long)offset, (long)offset + size, (long)(size/1024),
-                    intel_obj->gpu_active_start,
-                    intel_obj->gpu_active_end,
-                    intel_obj->valid_data_start,
-                    intel_obj->valid_data_end);
-         struct brw_bo *temp_bo =
-            brw_bo_alloc(brw->bufmgr, "subdata temp", size, BRW_MEMZONE_OTHER);
-
-         brw_bo_subdata(temp_bo, 0, size, data);
-
-         brw_blorp_copy_buffers(brw,
-                                temp_bo, 0,
-                                intel_obj->buffer, offset,
-                                size);
-         brw_emit_mi_flush(brw);
-
-         brw_bo_unreference(temp_bo);
-         mark_buffer_valid_data(intel_obj, offset, size);
-         return;
-      } else {
-         perf_debug("Stalling on glBufferSubData(%ld, %ld) (%ldkb) to a busy "
-                    "(%d-%d) buffer object.  Use glMapBufferRange() to "
-                    "avoid this.\n",
-                    (long)offset, (long)offset + size, (long)(size/1024),
-                    intel_obj->gpu_active_start,
-                    intel_obj->gpu_active_end);
-         brw_batch_flush(brw);
-      }
-   }
-
-   brw_bo_subdata(intel_obj->buffer, offset, size, data);
-   mark_buffer_inactive(intel_obj);
-   mark_buffer_valid_data(intel_obj, offset, size);
-}
-
-/* Typedef for memcpy function (used in brw_get_buffer_subdata below). */
-typedef void *(*mem_copy_fn)(void *dest, const void *src, size_t n);
-
-/**
- * The GetBufferSubData() driver hook.
- *
- * Implements glGetBufferSubData(), which copies a subrange of a buffer
- * object into user memory.
- */
-static void
-brw_get_buffer_subdata(struct gl_context *ctx,
-                       GLintptrARB offset,
-                       GLsizeiptrARB size,
-                       GLvoid *data,
-                       struct gl_buffer_object *obj)
-{
-   struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
-   struct brw_context *brw = brw_context(ctx);
-
-   assert(intel_obj);
-   if (brw_batch_references(&brw->batch, intel_obj->buffer)) {
-      brw_batch_flush(brw);
-   }
-
-   unsigned int map_flags = MAP_READ;
-   mem_copy_fn memcpy_fn = memcpy;
-#ifdef USE_SSE41
-   if (!intel_obj->buffer->cache_coherent && cpu_has_sse4_1) {
-      /* Rather than acquire a new WB mmaping of the buffer object and pull
-       * it into the CPU cache, keep using the WC mmap that we have for writes,
-       * and use the magic movntd instructions instead.
-       */
-      map_flags |= MAP_COHERENT;
-      memcpy_fn = (mem_copy_fn) _mesa_streaming_load_memcpy;
-   }
-#endif
-
-   void *map = brw_bo_map(brw, intel_obj->buffer, map_flags);
-   if (unlikely(!map)) {
-      _mesa_error_no_memory(__func__);
-      return;
-   }
-   memcpy_fn(data, map + offset, size);
-   brw_bo_unmap(intel_obj->buffer);
-
-   mark_buffer_inactive(intel_obj);
-}
-
-
-/**
- * The MapBufferRange() driver hook.
- *
- * This implements both glMapBufferRange() and glMapBuffer().
- *
- * The goal of this extension is to allow apps to accumulate their rendering
- * at the same time as they accumulate their buffer object.  Without it,
- * you'd end up blocking on execution of rendering every time you mapped
- * the buffer to put new data in.
- *
- * We support it in 3 ways: If unsynchronized, then don't bother
- * flushing the batchbuffer before mapping the buffer, which can save blocking
- * in many cases.  If we would still block, and they allow the whole buffer
- * to be invalidated, then just allocate a new buffer to replace the old one.
- * If not, and we'd block, and they allow the subrange of the buffer to be
- * invalidated, then we can make a new little BO, let them write into that,
- * and blit it into the real BO at unmap time.
- */
-static void *
-brw_map_buffer_range(struct gl_context *ctx,
-                     GLintptr offset, GLsizeiptr length,
-                     GLbitfield access, struct gl_buffer_object *obj,
-                     gl_map_buffer_index index)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
-
-   assert(intel_obj);
-
-   STATIC_ASSERT(GL_MAP_UNSYNCHRONIZED_BIT == MAP_ASYNC);
-   STATIC_ASSERT(GL_MAP_WRITE_BIT == MAP_WRITE);
-   STATIC_ASSERT(GL_MAP_READ_BIT == MAP_READ);
-   STATIC_ASSERT(GL_MAP_PERSISTENT_BIT == MAP_PERSISTENT);
-   STATIC_ASSERT(GL_MAP_COHERENT_BIT == MAP_COHERENT);
-   assert((access & MAP_INTERNAL_MASK) == 0);
-
-   /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also
-    * internally uses our functions directly.
-    */
-   obj->Mappings[index].Offset = offset;
-   obj->Mappings[index].Length = length;
-   obj->Mappings[index].AccessFlags = access;
-
-   if (intel_obj->buffer == NULL) {
-      obj->Mappings[index].Pointer = NULL;
-      return NULL;
-   }
-
-   /* If the access is synchronized (like a normal buffer mapping), then get
-    * things flushed out so the later mapping syncs appropriately through GEM.
-    * If the user doesn't care about existing buffer contents and mapping would
-    * cause us to block, then throw out the old buffer.
-    *
-    * If they set INVALIDATE_BUFFER, we can pitch the current contents to
-    * achieve the required synchronization.
-    */
-   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) {
-      if (brw_batch_references(&brw->batch, intel_obj->buffer)) {
-         if (access & GL_MAP_INVALIDATE_BUFFER_BIT) {
-            brw_bo_unreference(intel_obj->buffer);
-            alloc_buffer_object(brw, intel_obj);
-         } else {
-            perf_debug("Stalling on the GPU for mapping a busy buffer "
-                       "object\n");
-            brw_batch_flush(brw);
-         }
-      } else if (brw_bo_busy(intel_obj->buffer) &&
-                 (access & GL_MAP_INVALIDATE_BUFFER_BIT)) {
-         brw_bo_unreference(intel_obj->buffer);
-         alloc_buffer_object(brw, intel_obj);
-      }
-   }
-
-   if (access & MAP_WRITE)
-      mark_buffer_valid_data(intel_obj, offset, length);
-
-   /* If the user is mapping a range of an active buffer object but
-    * doesn't require the current contents of that range, make a new
-    * BO, and we'll copy what they put in there out at unmap or
-    * FlushRange time.
-    *
-    * That is, unless they're looking for a persistent mapping -- we would
-    * need to do blits in the MemoryBarrier call, and it's easier to just do a
-    * GPU stall and do a mapping.
-    */
-   if (!(access & (GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_PERSISTENT_BIT)) &&
-       (access & GL_MAP_INVALIDATE_RANGE_BIT) &&
-       brw_bo_busy(intel_obj->buffer)) {
-      /* Ensure that the base alignment of the allocation meets the alignment
-       * guarantees the driver has advertised to the application.
-       */
-      const unsigned alignment = ctx->Const.MinMapBufferAlignment;
-
-      intel_obj->map_extra[index] = (uintptr_t) offset % alignment;
-      intel_obj->range_map_bo[index] =
-         brw_bo_alloc(brw->bufmgr, "BO blit temp",
-                      length + intel_obj->map_extra[index],
-                      BRW_MEMZONE_OTHER);
-      void *map = brw_bo_map(brw, intel_obj->range_map_bo[index], access);
-      obj->Mappings[index].Pointer = map + intel_obj->map_extra[index];
-      return obj->Mappings[index].Pointer;
-   }
-
-   void *map = brw_bo_map(brw, intel_obj->buffer, access);
-   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) {
-      mark_buffer_inactive(intel_obj);
-   }
-
-   obj->Mappings[index].Pointer = map + offset;
-   return obj->Mappings[index].Pointer;
-}
-
-/**
- * The FlushMappedBufferRange() driver hook.
- *
- * Implements glFlushMappedBufferRange(), which signifies that modifications
- * have been made to a range of a mapped buffer, and it should be flushed.
- *
- * This is only used for buffers mapped with GL_MAP_FLUSH_EXPLICIT_BIT.
- *
- * Ideally we'd use a BO to avoid taking up cache space for the temporary
- * data, but FlushMappedBufferRange may be followed by further writes to
- * the pointer, so we would have to re-map after emitting our blit, which
- * would defeat the point.
- */
-static void
-brw_flush_mapped_buffer_range(struct gl_context *ctx,
-                              GLintptr offset, GLsizeiptr length,
-                              struct gl_buffer_object *obj,
-                              gl_map_buffer_index index)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
-
-   assert(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT);
-
-   /* If we gave a direct mapping of the buffer instead of using a temporary,
-    * then there's nothing to do.
-    */
-   if (intel_obj->range_map_bo[index] == NULL)
-      return;
-
-   if (length == 0)
-      return;
-
-   /* Note that we're not unmapping our buffer while executing the blit.  We
-    * need to have a mapping still at the end of this call, since the user
-    * gets to make further modifications and glFlushMappedBufferRange() calls.
-    * This is safe, because:
-    *
-    * - On LLC platforms, we're using a CPU mapping that's coherent with the
-    *   GPU (except for the render caches), so the kernel doesn't need to do
-    *   any flushing work for us except for what happens at batch exec time
-    *   anyway.
-    *
-    * - On non-LLC platforms, we're using a GTT mapping that writes directly
-    *   to system memory (except for the chipset cache that gets flushed at
-    *   batch exec time).
-    *
-    * In both cases we don't need to stall for the previous blit to complete
-    * so we can re-map (and we definitely don't want to, since that would be
-    * slow): If the user edits a part of their buffer that's previously been
-    * blitted, then our lack of synchoronization is fine, because either
-    * they'll get some too-new data in the first blit and not do another blit
-    * of that area (but in that case the results are undefined), or they'll do
-    * another blit of that area and the complete newer data will land the
-    * second time.
-    */
-   brw_blorp_copy_buffers(brw,
-                          intel_obj->range_map_bo[index],
-                          intel_obj->map_extra[index] + offset,
-                          intel_obj->buffer,
-                          obj->Mappings[index].Offset + offset,
-                          length);
-   mark_buffer_gpu_usage(intel_obj,
-                         obj->Mappings[index].Offset + offset,
-                         length);
-   brw_emit_mi_flush(brw);
-}
-
-
-/**
- * The UnmapBuffer() driver hook.
- *
- * Implements glUnmapBuffer().
- */
-static GLboolean
-brw_unmap_buffer(struct gl_context *ctx,
-                 struct gl_buffer_object *obj,
-                 gl_map_buffer_index index)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
-
-   assert(intel_obj);
-   assert(obj->Mappings[index].Pointer);
-   if (intel_obj->range_map_bo[index] != NULL) {
-      brw_bo_unmap(intel_obj->range_map_bo[index]);
-
-      if (!(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT)) {
-         brw_blorp_copy_buffers(brw,
-                                intel_obj->range_map_bo[index],
-                                intel_obj->map_extra[index],
-                                intel_obj->buffer, obj->Mappings[index].Offset,
-                                obj->Mappings[index].Length);
-         mark_buffer_gpu_usage(intel_obj, obj->Mappings[index].Offset,
-                               obj->Mappings[index].Length);
-         brw_emit_mi_flush(brw);
-      }
-
-      /* Since we've emitted some blits to buffers that will (likely) be used
-       * in rendering operations in other cache domains in this batch, emit a
-       * flush.  Once again, we wish for a domain tracker in libdrm to cover
-       * usage inside of a batchbuffer.
-       */
-
-      brw_bo_unreference(intel_obj->range_map_bo[index]);
-      intel_obj->range_map_bo[index] = NULL;
-   } else if (intel_obj->buffer != NULL) {
-      brw_bo_unmap(intel_obj->buffer);
-   }
-   obj->Mappings[index].Pointer = NULL;
-   obj->Mappings[index].Offset = 0;
-   obj->Mappings[index].Length = 0;
-
-   return true;
-}
-
-/**
- * Gets a pointer to the object's BO, and marks the given range as being used
- * on the GPU.
- *
- * Anywhere that uses buffer objects in the pipeline should be using this to
- * mark the range of the buffer that is being accessed by the pipeline.
- */
-struct brw_bo *
-brw_bufferobj_buffer(struct brw_context *brw,
-                     struct brw_buffer_object *intel_obj,
-                     uint32_t offset, uint32_t size, bool write)
-{
-   /* This is needed so that things like transform feedback and texture buffer
-    * objects that need a BO but don't want to check that they exist for
-    * draw-time validation can just always get a BO from a GL buffer object.
-    */
-   if (intel_obj->buffer == NULL)
-      alloc_buffer_object(brw, intel_obj);
-
-   mark_buffer_gpu_usage(intel_obj, offset, size);
-
-   /* If writing, (conservatively) mark this section as having valid data. */
-   if (write)
-      mark_buffer_valid_data(intel_obj, offset, size);
-
-   return intel_obj->buffer;
-}
-
-/**
- * The CopyBufferSubData() driver hook.
- *
- * Implements glCopyBufferSubData(), which copies a portion of one buffer
- * object's data to another.  Independent source and destination offsets
- * are allowed.
- */
-static void
-brw_copy_buffer_subdata(struct gl_context *ctx,
-                        struct gl_buffer_object *src,
-                        struct gl_buffer_object *dst,
-                        GLintptr read_offset, GLintptr write_offset,
-                        GLsizeiptr size)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_buffer_object *intel_src = brw_buffer_object(src);
-   struct brw_buffer_object *intel_dst = brw_buffer_object(dst);
-   struct brw_bo *src_bo, *dst_bo;
-
-   if (size == 0)
-      return;
-
-   dst_bo = brw_bufferobj_buffer(brw, intel_dst, write_offset, size, true);
-   src_bo = brw_bufferobj_buffer(brw, intel_src, read_offset, size, false);
-
-   brw_blorp_copy_buffers(brw,
-                          src_bo, read_offset,
-                          dst_bo, write_offset, size);
-
-   /* Since we've emitted some blits to buffers that will (likely) be used
-    * in rendering operations in other cache domains in this batch, emit a
-    * flush.  Once again, we wish for a domain tracker in libdrm to cover
-    * usage inside of a batchbuffer.
-    */
-   brw_emit_mi_flush(brw);
-}
-
-void
-brw_init_buffer_object_functions(struct dd_function_table *functions)
-{
-   functions->NewBufferObject = brw_new_buffer_object;
-   functions->DeleteBuffer = brw_delete_buffer;
-   functions->BufferData = brw_buffer_data;
-   functions->BufferDataMem = brw_buffer_data_mem;
-   functions->BufferSubData = brw_buffer_subdata;
-   functions->GetBufferSubData = brw_get_buffer_subdata;
-   functions->MapBufferRange = brw_map_buffer_range;
-   functions->FlushMappedBufferRange = brw_flush_mapped_buffer_range;
-   functions->UnmapBuffer = brw_unmap_buffer;
-   functions->CopyBufferSubData = brw_copy_buffer_subdata;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_buffer_objects.h b/src/mesa/drivers/dri/i965/brw_buffer_objects.h
deleted file mode 100644
index 3ed0930..0000000
--- a/src/mesa/drivers/dri/i965/brw_buffer_objects.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright 2005 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef BRW_BUFFEROBJ_H
-#define BRW_BUFFEROBJ_H
-
-#include "main/mtypes.h"
-
-struct brw_context;
-struct gl_buffer_object;
-
-
-/**
- * Intel vertex/pixel buffer object, derived from Mesa's gl_buffer_object.
- */
-struct brw_buffer_object
-{
-   struct gl_buffer_object Base;
-   struct brw_bo *buffer;     /* the low-level buffer manager's buffer handle */
-
-   struct brw_bo *range_map_bo[MAP_COUNT];
-
-   /**
-    * Alignment offset from the range_map_bo temporary mapping to the returned
-    * obj->Pointer (caused by GL_ARB_map_buffer_alignment).
-    */
-   unsigned map_extra[MAP_COUNT];
-
-   /** @{
-    * Tracking for what range of the BO may currently be in use by the GPU.
-    *
-    * Users often want to either glBufferSubData() or glMapBufferRange() a
-    * buffer object where some subset of it is busy on the GPU, without either
-    * stalling or doing an extra blit (since our blits are extra expensive,
-    * given that we have to reupload most of the 3D state when switching
-    * rings).  We wish they'd just use glMapBufferRange() with the
-    * UNSYNC|INVALIDATE_RANGE flag or the INVALIDATE_BUFFER flag, but lots
-    * don't.
-    *
-    * To work around apps, we track what range of the BO we might have used on
-    * the GPU as vertex data, tranform feedback output, buffer textures, etc.,
-    * and just do glBufferSubData() with an unsynchronized map when they're
-    * outside of that range.
-    *
-    * If gpu_active_start > gpu_active_end, then the GPU is not currently
-    * accessing the BO (and we can map it without synchronization).
-    */
-   uint32_t gpu_active_start;
-   uint32_t gpu_active_end;
-
-   /** @{
-    * Tracking for what range of the BO may contain valid data.
-    *
-    * Users may create a large buffer object and only fill part of it
-    * with valid data.  This is a conservative estimate of what part
-    * of the buffer contains valid data that we have to preserve.
-    */
-   uint32_t valid_data_start;
-   uint32_t valid_data_end;
-   /** @} */
-
-   /**
-    * If we've avoided stalls/blits using the active tracking, flag the buffer
-    * for (occasional) stalling in the future to avoid getting stuck in a
-    * cycle of blitting on buffer wraparound.
-    */
-   bool prefer_stall_to_blit;
-   /** @} */
-};
-
-
-/* Get the bm buffer associated with a GL bufferobject:
- */
-struct brw_bo *brw_bufferobj_buffer(struct brw_context *brw,
-                                    struct brw_buffer_object *obj,
-                                    uint32_t offset,
-                                    uint32_t size,
-                                    bool write);
-
-void brw_upload_data(struct brw_uploader *upload,
-                     const void *data,
-                     uint32_t size,
-                     uint32_t alignment,
-                     struct brw_bo **out_bo,
-                     uint32_t *out_offset);
-
-void *brw_upload_space(struct brw_uploader *upload,
-                       uint32_t size,
-                       uint32_t alignment,
-                       struct brw_bo **out_bo,
-                       uint32_t *out_offset);
-
-void brw_upload_finish(struct brw_uploader *upload);
-void brw_upload_init(struct brw_uploader *upload,
-                     struct brw_bufmgr *bufmgr,
-                     unsigned default_size);
-
-/* Hook the bufferobject implementation into mesa:
- */
-void brw_init_buffer_object_functions(struct dd_function_table *functions);
-
-static inline struct brw_buffer_object *
-brw_buffer_object(struct gl_buffer_object *obj)
-{
-   return (struct brw_buffer_object *) obj;
-}
-
-struct brw_memory_object {
-   struct gl_memory_object Base;
-   struct brw_bo *bo;
-};
-
-static inline struct brw_memory_object *
-brw_memory_object(struct gl_memory_object *obj)
-{
-   return (struct brw_memory_object *)obj;
-}
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_buffers.c b/src/mesa/drivers/dri/i965/brw_buffers.c
deleted file mode 100644
index 55b6925..0000000
--- a/src/mesa/drivers/dri/i965/brw_buffers.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "brw_context.h"
-#include "brw_buffers.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-
-#include "main/fbobject.h"
-#include "main/framebuffer.h"
-#include "main/renderbuffer.h"
-
-static void
-brw_drawbuffer(struct gl_context *ctx)
-{
-   if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) {
-      struct brw_context *const brw = brw_context(ctx);
-
-      /* If we might be front-buffer rendering on this buffer for the first
-       * time, invalidate our DRI drawable so we'll ask for new buffers
-       * (including the fake front) before we start rendering again.
-       */
-      if (brw->driContext->driDrawablePriv)
-          dri2InvalidateDrawable(brw->driContext->driDrawablePriv);
-      brw_prepare_render(brw);
-   }
-}
-
-
-static void
-brw_readbuffer(struct gl_context * ctx, GLenum mode)
-{
-   if (_mesa_is_front_buffer_reading(ctx->ReadBuffer)) {
-      struct brw_context *const brw = brw_context(ctx);
-
-      /* If we might be front-buffer reading on this buffer for the first
-       * time, invalidate our DRI drawable so we'll ask for new buffers
-       * (including the fake front) before we start reading again.
-       */
-      if (brw->driContext->driDrawablePriv)
-          dri2InvalidateDrawable(brw->driContext->driReadablePriv);
-      brw_prepare_render(brw);
-   }
-}
-
-
-void
-brw_init_buffer_functions(struct dd_function_table *functions)
-{
-   functions->DrawBuffer = brw_drawbuffer;
-   functions->ReadBuffer = brw_readbuffer;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_buffers.h b/src/mesa/drivers/dri/i965/brw_buffers.h
deleted file mode 100644
index 37c385f..0000000
--- a/src/mesa/drivers/dri/i965/brw_buffers.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef BRW_BUFFERS_H
-#define BRW_BUFFERS_H
-
-#include "dri_util.h"
-#include "drm-uapi/drm.h"
-#include "brw_context.h"
-
-extern void brw_init_buffer_functions(struct dd_function_table *functions);
-
-#endif /* BRW_BUFFERS_H */
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c
deleted file mode 100644
index b62d213..0000000
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ /dev/null
@@ -1,1967 +0,0 @@
-/*
- * Copyright Â© 2007 Red Hat Inc.
- * Copyright Â© 2007-2017 Intel Corporation
- * Copyright Â© 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/*
- * Authors: Thomas HellstrÃ¶m <thellstrom@vmware.com>
- *          Keith Whitwell <keithw@vmware.com>
- *          Eric Anholt <eric@anholt.net>
- *          Dave Airlie <airlied@linux.ie>
- */
-
-#include <xf86drm.h>
-#include <util/u_atomic.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <assert.h>
-#include <sys/ioctl.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <stdbool.h>
-
-#include "errno.h"
-#include "common/intel_clflush.h"
-#include "dev/intel_debug.h"
-#include "common/intel_gem.h"
-#include "dev/intel_device_info.h"
-#include "libdrm_macros.h"
-#include "main/macros.h"
-#include "util/macros.h"
-#include "util/hash_table.h"
-#include "util/list.h"
-#include "util/os_file.h"
-#include "util/u_dynarray.h"
-#include "util/vma.h"
-#include "brw_bufmgr.h"
-#include "brw_context.h"
-#include "string.h"
-
-#include "drm-uapi/i915_drm.h"
-
-#ifdef HAVE_VALGRIND
-#include <valgrind.h>
-#include <memcheck.h>
-#define VG(x) x
-#else
-#define VG(x)
-#endif
-
-/* Bufmgr is not aware of brw_context. */
-#undef WARN_ONCE
-#define WARN_ONCE(cond, fmt...) do {                            \
-   if (unlikely(cond)) {                                        \
-      static bool _warned = false;                              \
-      if (!_warned) {                                           \
-         fprintf(stderr, "WARNING: ");                          \
-         fprintf(stderr, fmt);                                  \
-         _warned = true;                                        \
-      }                                                         \
-   }                                                            \
-} while (0)
-
-
-/* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier
- * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is
- * leaked. All because it does not call VG(cli_free) from its
- * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like
- * and allocation, we mark it available for use upon mmapping and remove
- * it upon unmapping.
- */
-#define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size))
-#define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size))
-
-/* On FreeBSD PAGE_SIZE is already defined in
- * /usr/include/machine/param.h that is indirectly
- * included here.
- */
-#ifndef PAGE_SIZE
-#define PAGE_SIZE 4096
-#endif
-
-#define FILE_DEBUG_FLAG DEBUG_BUFMGR
-
-static inline int
-atomic_add_unless(int *v, int add, int unless)
-{
-   int c, old;
-   c = p_atomic_read(v);
-   while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
-      c = old;
-   return c == unless;
-}
-
-/**
- * i965 fixed-size bucketing VMA allocator.
- *
- * The BO cache maintains "cache buckets" for buffers of various sizes.
- * All buffers in a given bucket are identically sized - when allocating,
- * we always round up to the bucket size.  This means that virtually all
- * allocations are fixed-size; only buffers which are too large to fit in
- * a bucket can be variably-sized.
- *
- * We create an allocator for each bucket.  Each contains a free-list, where
- * each node contains a <starting address, 64-bit bitmap> pair.  Each bit
- * represents a bucket-sized block of memory.  (At the first level, each
- * bit corresponds to a page.  For the second bucket, bits correspond to
- * two pages, and so on.)  1 means a block is free, and 0 means it's in-use.
- * The lowest bit in the bitmap is for the first block.
- *
- * This makes allocations cheap - any bit of any node will do.  We can pick
- * the head of the list and use ffs() to find a free block.  If there are
- * none, we allocate 64 blocks from a larger allocator - either a bigger
- * bucketing allocator, or a fallback top-level allocator for large objects.
- */
-struct vma_bucket_node {
-   uint64_t start_address;
-   uint64_t bitmap;
-};
-
-struct bo_cache_bucket {
-   /** List of cached BOs. */
-   struct list_head head;
-
-   /** Size of this bucket, in bytes. */
-   uint64_t size;
-
-   /** List of vma_bucket_nodes. */
-   struct util_dynarray vma_list[BRW_MEMZONE_COUNT];
-};
-
-struct bo_export {
-   /** File descriptor associated with a handle export. */
-   int drm_fd;
-
-   /** GEM handle in drm_fd */
-   uint32_t gem_handle;
-
-   struct list_head link;
-};
-
-struct brw_bufmgr {
-   uint32_t refcount;
-
-   struct list_head link;
-
-   int fd;
-
-   mtx_t lock;
-
-   /** Array of lists of cached gem objects of power-of-two sizes */
-   struct bo_cache_bucket cache_bucket[14 * 4];
-   int num_buckets;
-   time_t time;
-
-   struct hash_table *name_table;
-   struct hash_table *handle_table;
-
-   struct util_vma_heap vma_allocator[BRW_MEMZONE_COUNT];
-
-   bool has_llc:1;
-   bool has_mmap_wc:1;
-   bool has_mmap_offset:1;
-   bool bo_reuse:1;
-
-   uint64_t initial_kflags;
-};
-
-static mtx_t global_bufmgr_list_mutex = _MTX_INITIALIZER_NP;
-static struct list_head global_bufmgr_list = {
-   .next = &global_bufmgr_list,
-   .prev = &global_bufmgr_list,
-};
-
-static int bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode,
-                                  uint32_t stride);
-
-static void bo_free(struct brw_bo *bo);
-
-static uint64_t vma_alloc(struct brw_bufmgr *bufmgr,
-                          enum brw_memory_zone memzone,
-                          uint64_t size, uint64_t alignment);
-
-static struct brw_bo *
-hash_find_bo(struct hash_table *ht, unsigned int key)
-{
-   struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
-   return entry ? (struct brw_bo *) entry->data : NULL;
-}
-
-static uint64_t
-bo_tile_size(struct brw_bufmgr *bufmgr, uint64_t size, uint32_t tiling)
-{
-   if (tiling == I915_TILING_NONE)
-      return size;
-
-   /* 965+ just need multiples of page size for tiling */
-   return ALIGN(size, PAGE_SIZE);
-}
-
-/*
- * Round a given pitch up to the minimum required for X tiling on a
- * given chip.  We use 512 as the minimum to allow for a later tiling
- * change.
- */
-static uint32_t
-bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, uint32_t tiling)
-{
-   unsigned long tile_width;
-
-   /* If untiled, then just align it so that we can do rendering
-    * to it with the 3D engine.
-    */
-   if (tiling == I915_TILING_NONE)
-      return ALIGN(pitch, 64);
-
-   if (tiling == I915_TILING_X)
-      tile_width = 512;
-   else
-      tile_width = 128;
-
-   /* 965 is flexible */
-   return ALIGN(pitch, tile_width);
-}
-
-/**
- * This function finds the correct bucket fit for the input size.
- * The function works with O(1) complexity when the requested size
- * was queried instead of iterating the size through all the buckets.
- */
-static struct bo_cache_bucket *
-bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
-{
-   /* Calculating the pages and rounding up to the page size. */
-   const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
-
-   /* Row  Bucket sizes    clz((x-1) | 3)   Row    Column
-    *        in pages                      stride   size
-    *   0:   1  2  3  4 -> 30 30 30 30        4       1
-    *   1:   5  6  7  8 -> 29 29 29 29        4       1
-    *   2:  10 12 14 16 -> 28 28 28 28        8       2
-    *   3:  20 24 28 32 -> 27 27 27 27       16       4
-    */
-   const unsigned row = 30 - __builtin_clz((pages - 1) | 3);
-   const unsigned row_max_pages = 4 << row;
-
-   /* The '& ~2' is the special case for row 1. In row 1, max pages /
-    * 2 is 2, but the previous row maximum is zero (because there is
-    * no previous row). All row maximum sizes are power of 2, so that
-    * is the only case where that bit will be set.
-    */
-   const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2;
-   int col_size_log2 = row - 1;
-   col_size_log2 += (col_size_log2 < 0);
-
-   const unsigned col = (pages - prev_row_max_pages +
-                        ((1 << col_size_log2) - 1)) >> col_size_log2;
-
-   /* Calculating the index based on the row and column. */
-   const unsigned index = (row * 4) + (col - 1);
-
-   return (index < bufmgr->num_buckets) ?
-          &bufmgr->cache_bucket[index] : NULL;
-}
-
-static enum brw_memory_zone
-memzone_for_address(uint64_t address)
-{
-   const uint64_t _4GB = 1ull << 32;
-
-   if (address >= _4GB)
-      return BRW_MEMZONE_OTHER;
-
-   return BRW_MEMZONE_LOW_4G;
-}
-
-static uint64_t
-bucket_vma_alloc(struct brw_bufmgr *bufmgr,
-                 struct bo_cache_bucket *bucket,
-                 enum brw_memory_zone memzone)
-{
-   struct util_dynarray *vma_list = &bucket->vma_list[memzone];
-   struct vma_bucket_node *node;
-
-   if (vma_list->size == 0) {
-      /* This bucket allocator is out of space - allocate a new block of
-       * memory for 64 blocks from a larger allocator (either a larger
-       * bucket or util_vma).
-       *
-       * We align the address to the node size (64 blocks) so that
-       * bucket_vma_free can easily compute the starting address of this
-       * block by rounding any address we return down to the node size.
-       *
-       * Set the first bit used, and return the start address.
-       */
-      uint64_t node_size = 64ull * bucket->size;
-      node = util_dynarray_grow(vma_list, struct vma_bucket_node, 1);
-
-      if (unlikely(!node))
-         return 0ull;
-
-      uint64_t addr = vma_alloc(bufmgr, memzone, node_size, node_size);
-      node->start_address = intel_48b_address(addr);
-      node->bitmap = ~1ull;
-      return node->start_address;
-   }
-
-   /* Pick any bit from any node - they're all the right size and free. */
-   node = util_dynarray_top_ptr(vma_list, struct vma_bucket_node);
-   int bit = ffsll(node->bitmap) - 1;
-   assert(bit >= 0 && bit <= 63);
-
-   /* Reserve the memory by clearing the bit. */
-   assert((node->bitmap & (1ull << bit)) != 0ull);
-   node->bitmap &= ~(1ull << bit);
-
-   uint64_t addr = node->start_address + bit * bucket->size;
-
-   /* If this node is now completely full, remove it from the free list. */
-   if (node->bitmap == 0ull) {
-      (void) util_dynarray_pop(vma_list, struct vma_bucket_node);
-   }
-
-   return addr;
-}
-
-static void
-bucket_vma_free(struct bo_cache_bucket *bucket, uint64_t address)
-{
-   enum brw_memory_zone memzone = memzone_for_address(address);
-   struct util_dynarray *vma_list = &bucket->vma_list[memzone];
-   const uint64_t node_bytes = 64ull * bucket->size;
-   struct vma_bucket_node *node = NULL;
-
-   /* bucket_vma_alloc allocates 64 blocks at a time, and aligns it to
-    * that 64 block size.  So, we can round down to get the starting address.
-    */
-   uint64_t start = (address / node_bytes) * node_bytes;
-
-   /* Dividing the offset from start by bucket size gives us the bit index. */
-   int bit = (address - start) / bucket->size;
-
-   assert(start + bit * bucket->size == address);
-
-   util_dynarray_foreach(vma_list, struct vma_bucket_node, cur) {
-      if (cur->start_address == start) {
-         node = cur;
-         break;
-      }
-   }
-
-   if (!node) {
-      /* No node - the whole group of 64 blocks must have been in-use. */
-      node = util_dynarray_grow(vma_list, struct vma_bucket_node, 1);
-
-      if (unlikely(!node))
-         return; /* bogus, leaks some GPU VMA, but nothing we can do... */
-
-      node->start_address = start;
-      node->bitmap = 0ull;
-   }
-
-   /* Set the bit to return the memory. */
-   assert((node->bitmap & (1ull << bit)) == 0ull);
-   node->bitmap |= 1ull << bit;
-
-   /* The block might be entirely free now, and if so, we could return it
-    * to the larger allocator.  But we may as well hang on to it, in case
-    * we get more allocations at this block size.
-    */
-}
-
-static struct bo_cache_bucket *
-get_bucket_allocator(struct brw_bufmgr *bufmgr, uint64_t size)
-{
-   /* Skip using the bucket allocator for very large sizes, as it allocates
-    * 64 of them and this can balloon rather quickly.
-    */
-   if (size > 1024 * PAGE_SIZE)
-      return NULL;
-
-   struct bo_cache_bucket *bucket = bucket_for_size(bufmgr, size);
-
-   if (bucket && bucket->size == size)
-      return bucket;
-
-   return NULL;
-}
-
-/**
- * Allocate a section of virtual memory for a buffer, assigning an address.
- *
- * This uses either the bucket allocator for the given size, or the large
- * object allocator (util_vma).
- */
-static uint64_t
-vma_alloc(struct brw_bufmgr *bufmgr,
-          enum brw_memory_zone memzone,
-          uint64_t size,
-          uint64_t alignment)
-{
-   /* Without softpin support, we let the kernel assign addresses. */
-   assert(brw_using_softpin(bufmgr));
-
-   alignment = ALIGN(alignment, PAGE_SIZE);
-
-   struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size);
-   uint64_t addr;
-
-   if (bucket) {
-      addr = bucket_vma_alloc(bufmgr, bucket, memzone);
-   } else {
-      addr = util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size,
-                                 alignment);
-   }
-
-   assert((addr >> 48ull) == 0);
-   assert((addr % alignment) == 0);
-
-   return intel_canonical_address(addr);
-}
-
-/**
- * Free a virtual memory area, allowing the address to be reused.
- */
-static void
-vma_free(struct brw_bufmgr *bufmgr,
-         uint64_t address,
-         uint64_t size)
-{
-   assert(brw_using_softpin(bufmgr));
-
-   /* Un-canonicalize the address. */
-   address = intel_48b_address(address);
-
-   if (address == 0ull)
-      return;
-
-   struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size);
-
-   if (bucket) {
-      bucket_vma_free(bucket, address);
-   } else {
-      enum brw_memory_zone memzone = memzone_for_address(address);
-      util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size);
-   }
-}
-
-int
-brw_bo_busy(struct brw_bo *bo)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-   struct drm_i915_gem_busy busy = { .handle = bo->gem_handle };
-
-   int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
-   if (ret == 0) {
-      bo->idle = !busy.busy;
-      return busy.busy;
-   }
-   return false;
-}
-
-int
-brw_bo_madvise(struct brw_bo *bo, int state)
-{
-   struct drm_i915_gem_madvise madv = {
-      .handle = bo->gem_handle,
-      .madv = state,
-      .retained = 1,
-   };
-
-   drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
-
-   return madv.retained;
-}
-
-/* drop the oldest entries that have been purged by the kernel */
-static void
-brw_bo_cache_purge_bucket(struct brw_bufmgr *bufmgr,
-                          struct bo_cache_bucket *bucket)
-{
-   list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) {
-      if (brw_bo_madvise(bo, I915_MADV_DONTNEED))
-         break;
-
-      list_del(&bo->head);
-      bo_free(bo);
-   }
-}
-
-static struct brw_bo *
-bo_calloc(void)
-{
-   struct brw_bo *bo = calloc(1, sizeof(*bo));
-   if (!bo)
-      return NULL;
-
-   list_inithead(&bo->exports);
-
-   return bo;
-}
-
-static struct brw_bo *
-bo_alloc_internal(struct brw_bufmgr *bufmgr,
-                  const char *name,
-                  uint64_t size,
-                  enum brw_memory_zone memzone,
-                  unsigned flags,
-                  uint32_t tiling_mode,
-                  uint32_t stride)
-{
-   struct brw_bo *bo;
-   int ret;
-   struct bo_cache_bucket *bucket;
-   bool alloc_from_cache;
-   uint64_t bo_size;
-   bool busy = false;
-   bool zeroed = false;
-
-   if (flags & BO_ALLOC_BUSY)
-      busy = true;
-
-   if (flags & BO_ALLOC_ZEROED)
-      zeroed = true;
-
-   /* BUSY does doesn't really jive with ZEROED as we have to wait for it to
-    * be idle before we can memset.  Just disallow that combination.
-    */
-   assert(!(busy && zeroed));
-
-   /* Round the allocated size up to a power of two number of pages. */
-   bucket = bucket_for_size(bufmgr, size);
-
-   /* If we don't have caching at this size, don't actually round the
-    * allocation up.
-    */
-   if (bucket == NULL) {
-      unsigned int page_size = getpagesize();
-      bo_size = size == 0 ? page_size : ALIGN(size, page_size);
-   } else {
-      bo_size = bucket->size;
-   }
-   assert(bo_size);
-
-   mtx_lock(&bufmgr->lock);
-   /* Get a buffer out of the cache if available */
-retry:
-   alloc_from_cache = false;
-   if (bucket != NULL && !list_is_empty(&bucket->head)) {
-      if (busy && !zeroed) {
-         /* Allocate new render-target BOs from the tail (MRU)
-          * of the list, as it will likely be hot in the GPU
-          * cache and in the aperture for us.  If the caller
-          * asked us to zero the buffer, we don't want this
-          * because we are going to mmap it.
-          */
-         bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head);
-         list_del(&bo->head);
-         alloc_from_cache = true;
-      } else {
-         /* For non-render-target BOs (where we're probably
-          * going to map it first thing in order to fill it
-          * with data), check if the last BO in the cache is
-          * unbusy, and only reuse in that case. Otherwise,
-          * allocating a new buffer is probably faster than
-          * waiting for the GPU to finish.
-          */
-         bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head);
-         if (!brw_bo_busy(bo)) {
-            alloc_from_cache = true;
-            list_del(&bo->head);
-         }
-      }
-
-      if (alloc_from_cache) {
-         assert(list_is_empty(&bo->exports));
-         if (!brw_bo_madvise(bo, I915_MADV_WILLNEED)) {
-            bo_free(bo);
-            brw_bo_cache_purge_bucket(bufmgr, bucket);
-            goto retry;
-         }
-
-         if (bo_set_tiling_internal(bo, tiling_mode, stride)) {
-            bo_free(bo);
-            goto retry;
-         }
-
-         if (zeroed) {
-            void *map = brw_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
-            if (!map) {
-               bo_free(bo);
-               goto retry;
-            }
-            memset(map, 0, bo_size);
-         }
-      }
-   }
-
-   if (alloc_from_cache) {
-      /* If the cache BO isn't in the right memory zone, free the old
-       * memory and assign it a new address.
-       */
-      if ((bo->kflags & EXEC_OBJECT_PINNED) &&
-          memzone != memzone_for_address(bo->gtt_offset)) {
-         vma_free(bufmgr, bo->gtt_offset, bo->size);
-         bo->gtt_offset = 0ull;
-      }
-   } else {
-      bo = bo_calloc();
-      if (!bo)
-         goto err;
-
-      bo->size = bo_size;
-      bo->idle = true;
-
-      struct drm_i915_gem_create create = { .size = bo_size };
-
-      /* All new BOs we get from the kernel are zeroed, so we don't need to
-       * worry about that here.
-       */
-      ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE, &create);
-      if (ret != 0) {
-         free(bo);
-         goto err;
-      }
-
-      bo->gem_handle = create.handle;
-
-      bo->bufmgr = bufmgr;
-
-      bo->tiling_mode = I915_TILING_NONE;
-      bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
-      bo->stride = 0;
-
-      if (bo_set_tiling_internal(bo, tiling_mode, stride))
-         goto err_free;
-
-      /* Calling set_domain() will allocate pages for the BO outside of the
-       * struct mutex lock in the kernel, which is more efficient than waiting
-       * to create them during the first execbuf that uses the BO.
-       */
-      struct drm_i915_gem_set_domain sd = {
-         .handle = bo->gem_handle,
-         .read_domains = I915_GEM_DOMAIN_CPU,
-         .write_domain = 0,
-      };
-
-      if (drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd) != 0)
-         goto err_free;
-   }
-
-   bo->name = name;
-   p_atomic_set(&bo->refcount, 1);
-   bo->reusable = true;
-   bo->cache_coherent = bufmgr->has_llc;
-   bo->index = -1;
-   bo->kflags = bufmgr->initial_kflags;
-
-   if ((bo->kflags & EXEC_OBJECT_PINNED) && bo->gtt_offset == 0ull) {
-      bo->gtt_offset = vma_alloc(bufmgr, memzone, bo->size, 1);
-
-      if (bo->gtt_offset == 0ull)
-         goto err_free;
-   }
-
-   mtx_unlock(&bufmgr->lock);
-
-   DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle, bo->name,
-       (unsigned long long) size);
-
-   return bo;
-
-err_free:
-   bo_free(bo);
-err:
-   mtx_unlock(&bufmgr->lock);
-   return NULL;
-}
-
-struct brw_bo *
-brw_bo_alloc(struct brw_bufmgr *bufmgr,
-             const char *name, uint64_t size,
-             enum brw_memory_zone memzone)
-{
-   return bo_alloc_internal(bufmgr, name, size, memzone,
-                            0, I915_TILING_NONE, 0);
-}
-
-struct brw_bo *
-brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr, const char *name,
-                   uint64_t size, enum brw_memory_zone memzone,
-                   uint32_t tiling_mode, uint32_t pitch,
-                   unsigned flags)
-{
-   return bo_alloc_internal(bufmgr, name, size, memzone,
-                            flags, tiling_mode, pitch);
-}
-
-struct brw_bo *
-brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr, const char *name,
-                      int x, int y, int cpp, enum brw_memory_zone memzone,
-                      uint32_t tiling, uint32_t *pitch, unsigned flags)
-{
-   uint64_t size;
-   uint32_t stride;
-   unsigned long aligned_y, height_alignment;
-
-   /* If we're tiled, our allocations are in 8 or 32-row blocks,
-    * so failure to align our height means that we won't allocate
-    * enough pages.
-    *
-    * If we're untiled, we still have to align to 2 rows high
-    * because the data port accesses 2x2 blocks even if the
-    * bottom row isn't to be rendered, so failure to align means
-    * we could walk off the end of the GTT and fault.  This is
-    * documented on 965, and may be the case on older chipsets
-    * too so we try to be careful.
-    */
-   aligned_y = y;
-   height_alignment = 2;
-
-   if (tiling == I915_TILING_X)
-      height_alignment = 8;
-   else if (tiling == I915_TILING_Y)
-      height_alignment = 32;
-   aligned_y = ALIGN(y, height_alignment);
-
-   stride = x * cpp;
-   stride = bo_tile_pitch(bufmgr, stride, tiling);
-   size = stride * aligned_y;
-   size = bo_tile_size(bufmgr, size, tiling);
-   *pitch = stride;
-
-   if (tiling == I915_TILING_NONE)
-      stride = 0;
-
-   return bo_alloc_internal(bufmgr, name, size, memzone,
-                            flags, tiling, stride);
-}
-
-/**
- * Returns a brw_bo wrapping the given buffer object handle.
- *
- * This can be used when one application needs to pass a buffer object
- * to another.
- */
-struct brw_bo *
-brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
-                            const char *name, unsigned int handle)
-{
-   struct brw_bo *bo;
-
-   /* At the moment most applications only have a few named bo.
-    * For instance, in a DRI client only the render buffers passed
-    * between X and the client are named. And since X returns the
-    * alternating names for the front/back buffer a linear search
-    * provides a sufficiently fast match.
-    */
-   mtx_lock(&bufmgr->lock);
-   bo = hash_find_bo(bufmgr->name_table, handle);
-   if (bo) {
-      brw_bo_reference(bo);
-      goto out;
-   }
-
-   struct drm_gem_open open_arg = { .name = handle };
-   int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
-   if (ret != 0) {
-      DBG("Couldn't reference %s handle 0x%08x: %s\n",
-          name, handle, strerror(errno));
-      bo = NULL;
-      goto out;
-   }
-   /* Now see if someone has used a prime handle to get this
-    * object from the kernel before by looking through the list
-    * again for a matching gem_handle
-    */
-   bo = hash_find_bo(bufmgr->handle_table, open_arg.handle);
-   if (bo) {
-      brw_bo_reference(bo);
-      goto out;
-   }
-
-   bo = bo_calloc();
-   if (!bo)
-      goto out;
-
-   p_atomic_set(&bo->refcount, 1);
-
-   bo->size = open_arg.size;
-   bo->gtt_offset = 0;
-   bo->bufmgr = bufmgr;
-   bo->gem_handle = open_arg.handle;
-   bo->name = name;
-   bo->global_name = handle;
-   bo->reusable = false;
-   bo->external = true;
-   bo->kflags = bufmgr->initial_kflags;
-
-   if (bo->kflags & EXEC_OBJECT_PINNED)
-      bo->gtt_offset = vma_alloc(bufmgr, BRW_MEMZONE_OTHER, bo->size, 1);
-
-   _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
-   _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
-
-   struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle };
-   ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling);
-   if (ret != 0)
-      goto err_unref;
-
-   bo->tiling_mode = get_tiling.tiling_mode;
-   bo->swizzle_mode = get_tiling.swizzle_mode;
-   /* XXX stride is unknown */
-   DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
-
-out:
-   mtx_unlock(&bufmgr->lock);
-   return bo;
-
-err_unref:
-   bo_free(bo);
-   mtx_unlock(&bufmgr->lock);
-   return NULL;
-}
-
-static void
-bo_free(struct brw_bo *bo)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-
-   if (bo->map_cpu) {
-      VG_NOACCESS(bo->map_cpu, bo->size);
-      drm_munmap(bo->map_cpu, bo->size);
-   }
-   if (bo->map_wc) {
-      VG_NOACCESS(bo->map_wc, bo->size);
-      drm_munmap(bo->map_wc, bo->size);
-   }
-   if (bo->map_gtt) {
-      VG_NOACCESS(bo->map_gtt, bo->size);
-      drm_munmap(bo->map_gtt, bo->size);
-   }
-
-   if (bo->external) {
-      struct hash_entry *entry;
-
-      if (bo->global_name) {
-         entry = _mesa_hash_table_search(bufmgr->name_table, &bo->global_name);
-         _mesa_hash_table_remove(bufmgr->name_table, entry);
-      }
-
-      entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
-      _mesa_hash_table_remove(bufmgr->handle_table, entry);
-   } else {
-      assert(list_is_empty(&bo->exports));
-   }
-
-   /* Close this object */
-   struct drm_gem_close close = { .handle = bo->gem_handle };
-   int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close);
-   if (ret != 0) {
-      DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
-          bo->gem_handle, bo->name, strerror(errno));
-   }
-
-   if (bo->kflags & EXEC_OBJECT_PINNED)
-      vma_free(bo->bufmgr, bo->gtt_offset, bo->size);
-
-   free(bo);
-}
-
-/** Frees all cached buffers significantly older than @time. */
-static void
-cleanup_bo_cache(struct brw_bufmgr *bufmgr, time_t time)
-{
-   int i;
-
-   if (bufmgr->time == time)
-      return;
-
-   for (i = 0; i < bufmgr->num_buckets; i++) {
-      struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
-
-      list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) {
-         if (time - bo->free_time <= 1)
-            break;
-
-         list_del(&bo->head);
-
-         bo_free(bo);
-      }
-   }
-
-   bufmgr->time = time;
-}
-
-static void
-bo_unreference_final(struct brw_bo *bo, time_t time)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-   struct bo_cache_bucket *bucket;
-
-   DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
-
-   list_for_each_entry_safe(struct bo_export, export, &bo->exports, link) {
-      struct drm_gem_close close = { .handle = export->gem_handle };
-      intel_ioctl(export->drm_fd, DRM_IOCTL_GEM_CLOSE, &close);
-
-      list_del(&export->link);
-      free(export);
-   }
-
-   bucket = bucket_for_size(bufmgr, bo->size);
-   /* Put the buffer into our internal cache for reuse if we can. */
-   if (bufmgr->bo_reuse && bo->reusable && bucket != NULL &&
-       brw_bo_madvise(bo, I915_MADV_DONTNEED)) {
-      bo->free_time = time;
-
-      bo->name = NULL;
-
-      list_addtail(&bo->head, &bucket->head);
-   } else {
-      bo_free(bo);
-   }
-}
-
-void
-brw_bo_unreference(struct brw_bo *bo)
-{
-   if (bo == NULL)
-      return;
-
-   assert(p_atomic_read(&bo->refcount) > 0);
-
-   if (atomic_add_unless(&bo->refcount, -1, 1)) {
-      struct brw_bufmgr *bufmgr = bo->bufmgr;
-      struct timespec time;
-
-      clock_gettime(CLOCK_MONOTONIC, &time);
-
-      mtx_lock(&bufmgr->lock);
-
-      if (p_atomic_dec_zero(&bo->refcount)) {
-         bo_unreference_final(bo, time.tv_sec);
-         cleanup_bo_cache(bufmgr, time.tv_sec);
-      }
-
-      mtx_unlock(&bufmgr->lock);
-   }
-}
-
-static void
-bo_wait_with_stall_warning(struct brw_context *brw,
-                           struct brw_bo *bo,
-                           const char *action)
-{
-   bool busy = brw && brw->perf_debug && !bo->idle;
-   double elapsed = unlikely(busy) ? -get_time() : 0.0;
-
-   brw_bo_wait_rendering(bo);
-
-   if (unlikely(busy)) {
-      elapsed += get_time();
-      if (elapsed > 1e-5) /* 0.01ms */
-         perf_debug("%s a busy \"%s\" BO stalled and took %.03f ms.\n",
-                    action, bo->name, elapsed * 1000);
-   }
-}
-
-static void
-print_flags(unsigned flags)
-{
-   if (flags & MAP_READ)
-      DBG("READ ");
-   if (flags & MAP_WRITE)
-      DBG("WRITE ");
-   if (flags & MAP_ASYNC)
-      DBG("ASYNC ");
-   if (flags & MAP_PERSISTENT)
-      DBG("PERSISTENT ");
-   if (flags & MAP_COHERENT)
-      DBG("COHERENT ");
-   if (flags & MAP_RAW)
-      DBG("RAW ");
-   DBG("\n");
-}
-
-static void *
-brw_bo_gem_mmap_legacy(struct brw_context *brw, struct brw_bo *bo, bool wc)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-
-   struct drm_i915_gem_mmap mmap_arg = {
-      .handle = bo->gem_handle,
-      .size = bo->size,
-      .flags = wc ? I915_MMAP_WC : 0,
-   };
-
-   int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg);
-   if (ret != 0) {
-      DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
-          __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
-      return NULL;
-   }
-   void *map = (void *) (uintptr_t) mmap_arg.addr_ptr;
-
-   return map;
-}
-
-static void *
-brw_bo_gem_mmap_offset(struct brw_context *brw, struct brw_bo *bo, bool wc)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-
-   struct drm_i915_gem_mmap_offset mmap_arg = {
-      .handle = bo->gem_handle,
-      .flags = wc ? I915_MMAP_OFFSET_WC : I915_MMAP_OFFSET_WB,
-   };
-
-   /* Get the fake offset back */
-   int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mmap_arg);
-   if (ret != 0) {
-      DBG("%s:%d: Error preparing buffer %d (%s): %s .\n",
-          __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
-      return NULL;
-   }
-
-   /* And map it */
-   void *map = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
-                        bufmgr->fd, mmap_arg.offset);
-   if (map == MAP_FAILED) {
-      DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
-          __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
-      return NULL;
-   }
-
-   return map;
-}
-
-static void *
-brw_bo_gem_mmap(struct brw_context *brw, struct brw_bo *bo, bool wc)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-
-   if (bufmgr->has_mmap_offset)
-      return brw_bo_gem_mmap_offset(brw, bo, wc);
-   else
-      return brw_bo_gem_mmap_legacy(brw, bo, wc);
-}
-
-static void *
-brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
-{
-   /* We disallow CPU maps for writing to non-coherent buffers, as the
-    * CPU map can become invalidated when a batch is flushed out, which
-    * can happen at unpredictable times.  You should use WC maps instead.
-    */
-   assert(bo->cache_coherent || !(flags & MAP_WRITE));
-
-   if (!bo->map_cpu) {
-      DBG("brw_bo_map_cpu: %d (%s)\n", bo->gem_handle, bo->name);
-
-      void *map = brw_bo_gem_mmap(brw, bo, false);
-      VG_DEFINED(map, bo->size);
-
-      if (p_atomic_cmpxchg(&bo->map_cpu, NULL, map)) {
-         VG_NOACCESS(map, bo->size);
-         drm_munmap(map, bo->size);
-      }
-   }
-   assert(bo->map_cpu);
-
-   DBG("brw_bo_map_cpu: %d (%s) -> %p, ", bo->gem_handle, bo->name,
-       bo->map_cpu);
-   print_flags(flags);
-
-   if (!(flags & MAP_ASYNC)) {
-      bo_wait_with_stall_warning(brw, bo, "CPU mapping");
-   }
-
-   if (!bo->cache_coherent && !bo->bufmgr->has_llc) {
-      /* If we're reusing an existing CPU mapping, the CPU caches may
-       * contain stale data from the last time we read from that mapping.
-       * (With the BO cache, it might even be data from a previous buffer!)
-       * Even if it's a brand new mapping, the kernel may have zeroed the
-       * buffer via CPU writes.
-       *
-       * We need to invalidate those cachelines so that we see the latest
-       * contents, and so long as we only read from the CPU mmap we do not
-       * need to write those cachelines back afterwards.
-       *
-       * On LLC, the emprical evidence suggests that writes from the GPU
-       * that bypass the LLC (i.e. for scanout) do *invalidate* the CPU
-       * cachelines. (Other reads, such as the display engine, bypass the
-       * LLC entirely requiring us to keep dirty pixels for the scanout
-       * out of any cache.)
-       */
-      intel_invalidate_range(bo->map_cpu, bo->size);
-   }
-
-   return bo->map_cpu;
-}
-
-static void *
-brw_bo_map_wc(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-
-   if (!bufmgr->has_mmap_wc)
-      return NULL;
-
-   if (!bo->map_wc) {
-      DBG("brw_bo_map_wc: %d (%s)\n", bo->gem_handle, bo->name);
-      void *map = brw_bo_gem_mmap(brw, bo, true);
-      VG_DEFINED(map, bo->size);
-
-      if (p_atomic_cmpxchg(&bo->map_wc, NULL, map)) {
-         VG_NOACCESS(map, bo->size);
-         drm_munmap(map, bo->size);
-      }
-   }
-   assert(bo->map_wc);
-
-   DBG("brw_bo_map_wc: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->map_wc);
-   print_flags(flags);
-
-   if (!(flags & MAP_ASYNC)) {
-      bo_wait_with_stall_warning(brw, bo, "WC mapping");
-   }
-
-   return bo->map_wc;
-}
-
-/**
- * Perform an uncached mapping via the GTT.
- *
- * Write access through the GTT is not quite fully coherent. On low power
- * systems especially, like modern Atoms, we can observe reads from RAM before
- * the write via GTT has landed. A write memory barrier that flushes the Write
- * Combining Buffer (i.e. sfence/mfence) is not sufficient to order the later
- * read after the write as the GTT write suffers a small delay through the GTT
- * indirection. The kernel uses an uncached mmio read to ensure the GTT write
- * is ordered with reads (either by the GPU, WB or WC) and unconditionally
- * flushes prior to execbuf submission. However, if we are not informing the
- * kernel about our GTT writes, it will not flush before earlier access, such
- * as when using the cmdparser. Similarly, we need to be careful if we should
- * ever issue a CPU read immediately following a GTT write.
- *
- * Telling the kernel about write access also has one more important
- * side-effect. Upon receiving notification about the write, it cancels any
- * scanout buffering for FBC/PSR and friends. Later FBC/PSR is then flushed by
- * either SW_FINISH or DIRTYFB. The presumption is that we never write to the
- * actual scanout via a mmaping, only to a backbuffer and so all the FBC/PSR
- * tracking is handled on the buffer exchange instead.
- */
-static void *
-brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-
-   /* Get a mapping of the buffer if we haven't before. */
-   if (bo->map_gtt == NULL) {
-      DBG("bo_map_gtt: mmap %d (%s)\n", bo->gem_handle, bo->name);
-
-      struct drm_i915_gem_mmap_gtt mmap_arg = { .handle = bo->gem_handle };
-
-      /* Get the fake offset back... */
-      int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg);
-      if (ret != 0) {
-         DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
-             __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
-         return NULL;
-      }
-
-      /* and mmap it. */
-      void *map = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
-                           MAP_SHARED, bufmgr->fd, mmap_arg.offset);
-      if (map == MAP_FAILED) {
-         DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
-             __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
-         return NULL;
-      }
-
-      /* We don't need to use VALGRIND_MALLOCLIKE_BLOCK because Valgrind will
-       * already intercept this mmap call. However, for consistency between
-       * all the mmap paths, we mark the pointer as defined now and mark it
-       * as inaccessible afterwards.
-       */
-      VG_DEFINED(map, bo->size);
-
-      if (p_atomic_cmpxchg(&bo->map_gtt, NULL, map)) {
-         VG_NOACCESS(map, bo->size);
-         drm_munmap(map, bo->size);
-      }
-   }
-   assert(bo->map_gtt);
-
-   DBG("bo_map_gtt: %d (%s) -> %p, ", bo->gem_handle, bo->name, bo->map_gtt);
-   print_flags(flags);
-
-   if (!(flags & MAP_ASYNC)) {
-      bo_wait_with_stall_warning(brw, bo, "GTT mapping");
-   }
-
-   return bo->map_gtt;
-}
-
-static bool
-can_map_cpu(struct brw_bo *bo, unsigned flags)
-{
-   if (bo->cache_coherent)
-      return true;
-
-   /* Even if the buffer itself is not cache-coherent (such as a scanout), on
-    * an LLC platform reads always are coherent (as they are performed via the
-    * central system agent). It is just the writes that we need to take special
-    * care to ensure that land in main memory and not stick in the CPU cache.
-    */
-   if (!(flags & MAP_WRITE) && bo->bufmgr->has_llc)
-      return true;
-
-   /* If PERSISTENT or COHERENT are set, the mmapping needs to remain valid
-    * across batch flushes where the kernel will change cache domains of the
-    * bo, invalidating continued access to the CPU mmap on non-LLC device.
-    *
-    * Similarly, ASYNC typically means that the buffer will be accessed via
-    * both the CPU and the GPU simultaneously.  Batches may be executed that
-    * use the BO even while it is mapped.  While OpenGL technically disallows
-    * most drawing while non-persistent mappings are active, we may still use
-    * the GPU for blits or other operations, causing batches to happen at
-    * inconvenient times.
-    */
-   if (flags & (MAP_PERSISTENT | MAP_COHERENT | MAP_ASYNC))
-      return false;
-
-   return !(flags & MAP_WRITE);
-}
-
-void *
-brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
-{
-   if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW))
-      return brw_bo_map_gtt(brw, bo, flags);
-
-   void *map;
-
-   if (can_map_cpu(bo, flags))
-      map = brw_bo_map_cpu(brw, bo, flags);
-   else
-      map = brw_bo_map_wc(brw, bo, flags);
-
-   /* Allow the attempt to fail by falling back to the GTT where necessary.
-    *
-    * Not every buffer can be mmaped directly using the CPU (or WC), for
-    * example buffers that wrap stolen memory or are imported from other
-    * devices. For those, we have little choice but to use a GTT mmapping.
-    * However, if we use a slow GTT mmapping for reads where we expected fast
-    * access, that order of magnitude difference in throughput will be clearly
-    * expressed by angry users.
-    *
-    * We skip MAP_RAW because we want to avoid map_gtt's fence detiling.
-    */
-   if (!map && !(flags & MAP_RAW)) {
-      if (brw) {
-         perf_debug("Fallback GTT mapping for %s with access flags %x\n",
-                    bo->name, flags);
-      }
-      map = brw_bo_map_gtt(brw, bo, flags);
-   }
-
-   return map;
-}
-
-int
-brw_bo_subdata(struct brw_bo *bo, uint64_t offset,
-               uint64_t size, const void *data)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-
-   struct drm_i915_gem_pwrite pwrite = {
-      .handle = bo->gem_handle,
-      .offset = offset,
-      .size = size,
-      .data_ptr = (uint64_t) (uintptr_t) data,
-   };
-
-   int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
-   if (ret != 0) {
-      ret = -errno;
-      DBG("%s:%d: Error writing data to buffer %d: "
-          "(%"PRIu64" %"PRIu64") %s .\n",
-          __FILE__, __LINE__, bo->gem_handle, offset, size, strerror(errno));
-   }
-
-   return ret;
-}
-
-/** Waits for all GPU rendering with the object to have completed. */
-void
-brw_bo_wait_rendering(struct brw_bo *bo)
-{
-   /* We require a kernel recent enough for WAIT_IOCTL support.
-    * See brw_init_bufmgr()
-    */
-   brw_bo_wait(bo, -1);
-}
-
-/**
- * Waits on a BO for the given amount of time.
- *
- * @bo: buffer object to wait for
- * @timeout_ns: amount of time to wait in nanoseconds.
- *   If value is less than 0, an infinite wait will occur.
- *
- * Returns 0 if the wait was successful ie. the last batch referencing the
- * object has completed within the allotted time. Otherwise some negative return
- * value describes the error. Of particular interest is -ETIME when the wait has
- * failed to yield the desired result.
- *
- * Similar to brw_bo_wait_rendering except a timeout parameter allows
- * the operation to give up after a certain amount of time. Another subtle
- * difference is the internal locking semantics are different (this variant does
- * not hold the lock for the duration of the wait). This makes the wait subject
- * to a larger userspace race window.
- *
- * The implementation shall wait until the object is no longer actively
- * referenced within a batch buffer at the time of the call. The wait will
- * not guarantee that the buffer is re-issued via another thread, or an flinked
- * handle. Userspace must make sure this race does not occur if such precision
- * is important.
- *
- * Note that some kernels have broken the inifite wait for negative values
- * promise, upgrade to latest stable kernels if this is the case.
- */
-int
-brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-
-   /* If we know it's idle, don't bother with the kernel round trip */
-   if (bo->idle && !bo->external)
-      return 0;
-
-   struct drm_i915_gem_wait wait = {
-      .bo_handle = bo->gem_handle,
-      .timeout_ns = timeout_ns,
-   };
-   int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
-   if (ret != 0)
-      return -errno;
-
-   bo->idle = true;
-
-   return ret;
-}
-
-void
-brw_bufmgr_unref(struct brw_bufmgr *bufmgr)
-{
-   mtx_lock(&global_bufmgr_list_mutex);
-   if (p_atomic_dec_zero(&bufmgr->refcount)) {
-      list_del(&bufmgr->link);
-   } else {
-      bufmgr = NULL;
-   }
-   mtx_unlock(&global_bufmgr_list_mutex);
-
-   if (!bufmgr)
-      return;
-
-   mtx_destroy(&bufmgr->lock);
-
-   /* Free any cached buffer objects we were going to reuse */
-   for (int i = 0; i < bufmgr->num_buckets; i++) {
-      struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
-
-      list_for_each_entry_safe(struct brw_bo, bo, &bucket->head, head) {
-         list_del(&bo->head);
-
-         bo_free(bo);
-      }
-
-      if (brw_using_softpin(bufmgr)) {
-         for (int z = 0; z < BRW_MEMZONE_COUNT; z++) {
-            util_dynarray_fini(&bucket->vma_list[z]);
-         }
-      }
-   }
-
-   _mesa_hash_table_destroy(bufmgr->name_table, NULL);
-   _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
-
-   if (brw_using_softpin(bufmgr)) {
-      for (int z = 0; z < BRW_MEMZONE_COUNT; z++) {
-         util_vma_heap_finish(&bufmgr->vma_allocator[z]);
-      }
-   }
-
-   close(bufmgr->fd);
-   bufmgr->fd = -1;
-
-   free(bufmgr);
-}
-
-static int
-bo_set_tiling_internal(struct brw_bo *bo, uint32_t tiling_mode,
-                       uint32_t stride)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-   struct drm_i915_gem_set_tiling set_tiling;
-   int ret;
-
-   if (bo->global_name == 0 &&
-       tiling_mode == bo->tiling_mode && stride == bo->stride)
-      return 0;
-
-   memset(&set_tiling, 0, sizeof(set_tiling));
-   do {
-      /* set_tiling is slightly broken and overwrites the
-       * input on the error path, so we have to open code
-       * rmIoctl.
-       */
-      set_tiling.handle = bo->gem_handle;
-      set_tiling.tiling_mode = tiling_mode;
-      set_tiling.stride = stride;
-
-      ret = ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
-   } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
-   if (ret == -1)
-      return -errno;
-
-   bo->tiling_mode = set_tiling.tiling_mode;
-   bo->swizzle_mode = set_tiling.swizzle_mode;
-   bo->stride = set_tiling.stride;
-   return 0;
-}
-
-int
-brw_bo_get_tiling(struct brw_bo *bo, uint32_t *tiling_mode,
-                  uint32_t *swizzle_mode)
-{
-   *tiling_mode = bo->tiling_mode;
-   *swizzle_mode = bo->swizzle_mode;
-   return 0;
-}
-
-static struct brw_bo *
-brw_bo_gem_create_from_prime_internal(struct brw_bufmgr *bufmgr, int prime_fd,
-                                      int tiling_mode, uint32_t stride)
-{
-   uint32_t handle;
-   struct brw_bo *bo;
-
-   mtx_lock(&bufmgr->lock);
-   int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
-   if (ret) {
-      DBG("create_from_prime: failed to obtain handle from fd: %s\n",
-          strerror(errno));
-      mtx_unlock(&bufmgr->lock);
-      return NULL;
-   }
-
-   /*
-    * See if the kernel has already returned this buffer to us. Just as
-    * for named buffers, we must not create two bo's pointing at the same
-    * kernel object
-    */
-   bo = hash_find_bo(bufmgr->handle_table, handle);
-   if (bo) {
-      brw_bo_reference(bo);
-      goto out;
-   }
-
-   bo = bo_calloc();
-   if (!bo)
-      goto out;
-
-   p_atomic_set(&bo->refcount, 1);
-
-   /* Determine size of bo.  The fd-to-handle ioctl really should
-    * return the size, but it doesn't.  If we have kernel 3.12 or
-    * later, we can lseek on the prime fd to get the size.  Older
-    * kernels will just fail, in which case we fall back to the
-    * provided (estimated or guess size). */
-   ret = lseek(prime_fd, 0, SEEK_END);
-   if (ret != -1)
-      bo->size = ret;
-
-   bo->bufmgr = bufmgr;
-
-   bo->gem_handle = handle;
-   _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
-
-   bo->name = "prime";
-   bo->reusable = false;
-   bo->external = true;
-   bo->kflags = bufmgr->initial_kflags;
-
-   if (bo->kflags & EXEC_OBJECT_PINNED) {
-      assert(bo->size > 0);
-      bo->gtt_offset = vma_alloc(bufmgr, BRW_MEMZONE_OTHER, bo->size, 1);
-   }
-
-   if (tiling_mode < 0) {
-      struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle };
-      if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling))
-         goto err;
-
-      bo->tiling_mode = get_tiling.tiling_mode;
-      bo->swizzle_mode = get_tiling.swizzle_mode;
-      /* XXX stride is unknown */
-   } else {
-      bo_set_tiling_internal(bo, tiling_mode, stride);
-   }
-
-out:
-   mtx_unlock(&bufmgr->lock);
-   return bo;
-
-err:
-   bo_free(bo);
-   mtx_unlock(&bufmgr->lock);
-   return NULL;
-}
-
-struct brw_bo *
-brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr, int prime_fd)
-{
-   return brw_bo_gem_create_from_prime_internal(bufmgr, prime_fd, -1, 0);
-}
-
-struct brw_bo *
-brw_bo_gem_create_from_prime_tiled(struct brw_bufmgr *bufmgr, int prime_fd,
-                                   uint32_t tiling_mode, uint32_t stride)
-{
-   assert(tiling_mode == I915_TILING_NONE ||
-          tiling_mode == I915_TILING_X ||
-          tiling_mode == I915_TILING_Y);
-
-   return brw_bo_gem_create_from_prime_internal(bufmgr, prime_fd,
-                                                tiling_mode, stride);
-}
-
-static void
-brw_bo_make_external(struct brw_bo *bo)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-
-   if (!bo->external) {
-      mtx_lock(&bufmgr->lock);
-      if (!bo->external) {
-         _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
-         bo->external = true;
-      }
-      mtx_unlock(&bufmgr->lock);
-   }
-}
-
-int
-brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-
-   brw_bo_make_external(bo);
-
-   if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
-                          DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
-      return -errno;
-
-   bo->reusable = false;
-
-   return 0;
-}
-
-uint32_t
-brw_bo_export_gem_handle(struct brw_bo *bo)
-{
-   brw_bo_make_external(bo);
-
-   return bo->gem_handle;
-}
-
-int
-brw_bo_flink(struct brw_bo *bo, uint32_t *name)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-
-   if (!bo->global_name) {
-      struct drm_gem_flink flink = { .handle = bo->gem_handle };
-
-      if (drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
-         return -errno;
-
-      brw_bo_make_external(bo);
-      mtx_lock(&bufmgr->lock);
-      if (!bo->global_name) {
-         bo->global_name = flink.name;
-         _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
-      }
-      mtx_unlock(&bufmgr->lock);
-
-      bo->reusable = false;
-   }
-
-   *name = bo->global_name;
-   return 0;
-}
-
-int
-brw_bo_export_gem_handle_for_device(struct brw_bo *bo, int drm_fd,
-                                    uint32_t *out_handle)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-
-   /* Only add the new GEM handle to the list of export if it belongs to a
-    * different GEM device. Otherwise we might close the same buffer multiple
-    * times.
-    */
-   int ret = os_same_file_description(drm_fd, bufmgr->fd);
-   WARN_ONCE(ret < 0,
-             "Kernel has no file descriptor comparison support: %s\n",
-             strerror(errno));
-   if (ret == 0) {
-      *out_handle = brw_bo_export_gem_handle(bo);
-      return 0;
-   }
-
-   struct bo_export *export = calloc(1, sizeof(*export));
-   if (!export)
-      return -ENOMEM;
-
-   export->drm_fd = drm_fd;
-
-   int dmabuf_fd = -1;
-   int err = brw_bo_gem_export_to_prime(bo, &dmabuf_fd);
-   if (err) {
-      free(export);
-      return err;
-   }
-
-   mtx_lock(&bufmgr->lock);
-   err = drmPrimeFDToHandle(drm_fd, dmabuf_fd, &export->gem_handle);
-   close(dmabuf_fd);
-   if (err) {
-      mtx_unlock(&bufmgr->lock);
-      free(export);
-      return err;
-   }
-
-   bool found = false;
-   list_for_each_entry(struct bo_export, iter, &bo->exports, link) {
-      if (iter->drm_fd != drm_fd)
-         continue;
-      /* Here we assume that for a given DRM fd, we'll always get back the
-       * same GEM handle for a given buffer.
-       */
-      assert(iter->gem_handle == export->gem_handle);
-      free(export);
-      export = iter;
-      found = true;
-      break;
-   }
-   if (!found)
-      list_addtail(&export->link, &bo->exports);
-
-   mtx_unlock(&bufmgr->lock);
-
-   *out_handle = export->gem_handle;
-
-   return 0;
-}
-
-static void
-add_bucket(struct brw_bufmgr *bufmgr, int size)
-{
-   unsigned int i = bufmgr->num_buckets;
-
-   assert(i < ARRAY_SIZE(bufmgr->cache_bucket));
-
-   list_inithead(&bufmgr->cache_bucket[i].head);
-   if (brw_using_softpin(bufmgr)) {
-      for (int z = 0; z < BRW_MEMZONE_COUNT; z++)
-         util_dynarray_init(&bufmgr->cache_bucket[i].vma_list[z], NULL);
-   }
-   bufmgr->cache_bucket[i].size = size;
-   bufmgr->num_buckets++;
-
-   assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]);
-   assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]);
-   assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]);
-}
-
-static void
-init_cache_buckets(struct brw_bufmgr *bufmgr)
-{
-   uint64_t size, cache_max_size = 64 * 1024 * 1024;
-
-   /* OK, so power of two buckets was too wasteful of memory.
-    * Give 3 other sizes between each power of two, to hopefully
-    * cover things accurately enough.  (The alternative is
-    * probably to just go for exact matching of sizes, and assume
-    * that for things like composited window resize the tiled
-    * width/height alignment and rounding of sizes to pages will
-    * get us useful cache hit rates anyway)
-    */
-   add_bucket(bufmgr, PAGE_SIZE);
-   add_bucket(bufmgr, PAGE_SIZE * 2);
-   add_bucket(bufmgr, PAGE_SIZE * 3);
-
-   /* Initialize the linked lists for BO reuse cache. */
-   for (size = 4 * PAGE_SIZE; size <= cache_max_size; size *= 2) {
-      add_bucket(bufmgr, size);
-
-      add_bucket(bufmgr, size + size * 1 / 4);
-      add_bucket(bufmgr, size + size * 2 / 4);
-      add_bucket(bufmgr, size + size * 3 / 4);
-   }
-}
-
-uint32_t
-brw_create_hw_context(struct brw_bufmgr *bufmgr)
-{
-   struct drm_i915_gem_context_create create = { };
-   int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
-   if (ret != 0) {
-      DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", strerror(errno));
-      return 0;
-   }
-
-   return create.ctx_id;
-}
-
-int
-brw_hw_context_set_priority(struct brw_bufmgr *bufmgr,
-                            uint32_t ctx_id,
-                            int priority)
-{
-   struct drm_i915_gem_context_param p = {
-      .ctx_id = ctx_id,
-      .param = I915_CONTEXT_PARAM_PRIORITY,
-      .value = priority,
-   };
-   int err;
-
-   err = 0;
-   if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p))
-      err = -errno;
-
-   return err;
-}
-
-void
-brw_destroy_hw_context(struct brw_bufmgr *bufmgr, uint32_t ctx_id)
-{
-   struct drm_i915_gem_context_destroy d = { .ctx_id = ctx_id };
-
-   if (ctx_id != 0 &&
-       drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &d) != 0) {
-      fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
-              strerror(errno));
-   }
-}
-
-int
-brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t offset, uint64_t *result)
-{
-   struct drm_i915_reg_read reg_read = { .offset = offset };
-   int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
-
-   *result = reg_read.val;
-   return ret;
-}
-
-static int
-gem_param(int fd, int name)
-{
-   int v = -1; /* No param uses (yet) the sign bit, reserve it for errors */
-
-   struct drm_i915_getparam gp = { .param = name, .value = &v };
-   if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
-      return -1;
-
-   return v;
-}
-
-static int
-gem_context_getparam(int fd, uint32_t context, uint64_t param, uint64_t *value)
-{
-   struct drm_i915_gem_context_param gp = {
-      .ctx_id = context,
-      .param = param,
-   };
-
-   if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &gp))
-      return -1;
-
-   *value = gp.value;
-
-   return 0;
-}
-
-bool
-brw_using_softpin(struct brw_bufmgr *bufmgr)
-{
-   return bufmgr->initial_kflags & EXEC_OBJECT_PINNED;
-}
-
-static struct brw_bufmgr *
-brw_bufmgr_ref(struct brw_bufmgr *bufmgr)
-{
-   p_atomic_inc(&bufmgr->refcount);
-   return bufmgr;
-}
-
-/**
- * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
- * and manage map buffer objections.
- *
- * \param fd File descriptor of the opened DRM device.
- */
-static struct brw_bufmgr *
-brw_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse)
-{
-   struct brw_bufmgr *bufmgr;
-
-   bufmgr = calloc(1, sizeof(*bufmgr));
-   if (bufmgr == NULL)
-      return NULL;
-
-   /* Handles to buffer objects belong to the device fd and are not
-    * reference counted by the kernel.  If the same fd is used by
-    * multiple parties (threads sharing the same screen bufmgr, or
-    * even worse the same device fd passed to multiple libraries)
-    * ownership of those handles is shared by those independent parties.
-    *
-    * Don't do this! Ensure that each library/bufmgr has its own device
-    * fd so that its namespace does not clash with another.
-    */
-   bufmgr->fd = os_dupfd_cloexec(fd);
-   if (bufmgr->fd < 0) {
-      free(bufmgr);
-      return NULL;
-   }
-
-   p_atomic_set(&bufmgr->refcount, 1);
-
-   if (mtx_init(&bufmgr->lock, mtx_plain) != 0) {
-      close(bufmgr->fd);
-      free(bufmgr);
-      return NULL;
-   }
-
-   uint64_t gtt_size;
-   if (gem_context_getparam(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE, &gtt_size))
-      gtt_size = 0;
-
-   bufmgr->has_llc = devinfo->has_llc;
-   bufmgr->has_mmap_wc = gem_param(fd, I915_PARAM_MMAP_VERSION) > 0;
-   bufmgr->bo_reuse = bo_reuse;
-   bufmgr->has_mmap_offset = gem_param(fd, I915_PARAM_MMAP_GTT_VERSION) >= 4;
-
-   const uint64_t _4GB = 4ull << 30;
-
-   /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */
-   const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE;
-
-   if (devinfo->ver >= 8 && gtt_size > _4GB) {
-      bufmgr->initial_kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
-
-      /* Allocate VMA in userspace if we have softpin and full PPGTT. */
-      if (gem_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN) > 0 &&
-          gem_param(fd, I915_PARAM_HAS_ALIASING_PPGTT) > 1) {
-         bufmgr->initial_kflags |= EXEC_OBJECT_PINNED;
-
-         util_vma_heap_init(&bufmgr->vma_allocator[BRW_MEMZONE_LOW_4G],
-                            PAGE_SIZE, _4GB_minus_1);
-
-         /* Leave the last 4GB out of the high vma range, so that no state
-          * base address + size can overflow 48 bits.
-          */
-         util_vma_heap_init(&bufmgr->vma_allocator[BRW_MEMZONE_OTHER],
-                            1 * _4GB, gtt_size - 2 * _4GB);
-      } else if (devinfo->ver >= 10) {
-         /* Softpin landed in 4.5, but GVT used an aliasing PPGTT until
-          * kernel commit 6b3816d69628becb7ff35978aa0751798b4a940a in
-          * 4.14.  Gfx10+ GVT hasn't landed yet, so it's not actually a
-          * problem - but extending this requirement back to earlier gens
-          * might actually mean requiring 4.14.
-          */
-         fprintf(stderr, "i965 requires softpin (Kernel 4.5) on Gfx10+.");
-         close(bufmgr->fd);
-         free(bufmgr);
-         return NULL;
-      }
-   }
-
-   init_cache_buckets(bufmgr);
-
-   bufmgr->name_table =
-      _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
-   bufmgr->handle_table =
-      _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
-
-   return bufmgr;
-}
-
-struct brw_bufmgr *
-brw_bufmgr_get_for_fd(struct intel_device_info *devinfo, int fd, bool bo_reuse)
-{
-   struct stat st;
-
-   if (fstat(fd, &st))
-      return NULL;
-
-   struct brw_bufmgr *bufmgr = NULL;
-
-   mtx_lock(&global_bufmgr_list_mutex);
-   list_for_each_entry(struct brw_bufmgr, iter_bufmgr, &global_bufmgr_list, link) {
-      struct stat iter_st;
-      if (fstat(iter_bufmgr->fd, &iter_st))
-         continue;
-
-      if (st.st_rdev == iter_st.st_rdev) {
-         assert(iter_bufmgr->bo_reuse == bo_reuse);
-         bufmgr = brw_bufmgr_ref(iter_bufmgr);
-         goto unlock;
-      }
-   }
-
-   bufmgr = brw_bufmgr_create(devinfo, fd, bo_reuse);
-   if (bufmgr)
-      list_addtail(&bufmgr->link, &global_bufmgr_list);
-
- unlock:
-   mtx_unlock(&global_bufmgr_list_mutex);
-
-   return bufmgr;
-}
-
-int
-brw_bufmgr_get_fd(struct brw_bufmgr *bufmgr)
-{
-   return bufmgr->fd;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h b/src/mesa/drivers/dri/i965/brw_bufmgr.h
deleted file mode 100644
index cb272a9..0000000
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ /dev/null
@@ -1,404 +0,0 @@
-/*
- * Copyright Â© 2008-2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *
- */
-
-/**
- * @file brw_bufmgr.h
- *
- * Public definitions of Intel-specific bufmgr functions.
- */
-
-#ifndef BRW_BUFMGR_H
-#define BRW_BUFMGR_H
-
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <time.h>
-
-#include "c11/threads.h"
-#include "util/u_atomic.h"
-#include "util/list.h"
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-struct intel_device_info;
-struct brw_context;
-
-/**
- * Memory zones.  When allocating a buffer, you can request that it is
- * placed into a specific region of the virtual address space (PPGTT).
- *
- * Most buffers can go anywhere (BRW_MEMZONE_OTHER).  Some buffers are
- * accessed via an offset from a base address.  STATE_BASE_ADDRESS has
- * a maximum 4GB size for each region, so we need to restrict those
- * buffers to be within 4GB of the base.  Each memory zone corresponds
- * to a particular base address.
- *
- * Currently, i965 partitions the address space into two regions:
- *
- * - Low 4GB
- * - Full 48-bit address space
- *
- * Eventually, we hope to carve out 4GB of VMA for each base address.
- */
-enum brw_memory_zone {
-   BRW_MEMZONE_LOW_4G,
-   BRW_MEMZONE_OTHER,
-
-   /* Shaders - Instruction State Base Address */
-   BRW_MEMZONE_SHADER  = BRW_MEMZONE_LOW_4G,
-
-   /* Scratch - General State Base Address */
-   BRW_MEMZONE_SCRATCH = BRW_MEMZONE_LOW_4G,
-
-   /* Surface State Base Address */
-   BRW_MEMZONE_SURFACE = BRW_MEMZONE_LOW_4G,
-
-   /* Dynamic State Base Address */
-   BRW_MEMZONE_DYNAMIC = BRW_MEMZONE_LOW_4G,
-};
-
-#define BRW_MEMZONE_COUNT (BRW_MEMZONE_OTHER + 1)
-
-struct brw_bo {
-   /**
-    * Size in bytes of the buffer object.
-    *
-    * The size may be larger than the size originally requested for the
-    * allocation, such as being aligned to page size.
-    */
-   uint64_t size;
-
-   /** Buffer manager context associated with this buffer object */
-   struct brw_bufmgr *bufmgr;
-
-   /** The GEM handle for this buffer object. */
-   uint32_t gem_handle;
-
-   /**
-    * Offset of the buffer inside the Graphics Translation Table.
-    *
-    * This is effectively our GPU address for the buffer and we use it
-    * as our base for all state pointers into the buffer. However, since the
-    * kernel may be forced to move it around during the course of the
-    * buffer's lifetime, we can only know where the buffer was on the last
-    * execbuf. We presume, and are usually right, that the buffer will not
-    * move and so we use that last offset for the next batch and by doing
-    * so we can avoid having the kernel perform a relocation fixup pass as
-    * our pointers inside the batch will be using the correct base offset.
-    *
-    * Since we do use it as a base address for the next batch of pointers,
-    * the kernel treats our offset as a request, and if possible will
-    * arrange the buffer to placed at that address (trying to balance
-    * the cost of buffer migration versus the cost of performing
-    * relocations). Furthermore, we can force the kernel to place the buffer,
-    * or report a failure if we specified a conflicting offset, at our chosen
-    * offset by specifying EXEC_OBJECT_PINNED.
-    *
-    * Note the GTT may be either per context, or shared globally across the
-    * system. On a shared system, our buffers have to contend for address
-    * space with both aperture mappings and framebuffers and so are more
-    * likely to be moved. On a full ppGTT system, each batch exists in its
-    * own GTT, and so each buffer may have their own offset within each
-    * context.
-    */
-   uint64_t gtt_offset;
-
-   /**
-    * The validation list index for this buffer, or -1 when not in a batch.
-    * Note that a single buffer may be in multiple batches (contexts), and
-    * this is a global field, which refers to the last batch using the BO.
-    * It should not be considered authoritative, but can be used to avoid a
-    * linear walk of the validation list in the common case by guessing that
-    * exec_bos[bo->index] == bo and confirming whether that's the case.
-    */
-   unsigned index;
-
-   /**
-    * Boolean of whether the GPU is definitely not accessing the buffer.
-    *
-    * This is only valid when reusable, since non-reusable
-    * buffers are those that have been shared with other
-    * processes, so we don't know their state.
-    */
-   bool idle;
-
-   int refcount;
-   const char *name;
-
-   uint64_t kflags;
-
-   /**
-    * Kenel-assigned global name for this object
-    *
-    * List contains both flink named and prime fd'd objects
-    */
-   unsigned int global_name;
-
-   /**
-    * Current tiling mode
-    */
-   uint32_t tiling_mode;
-   uint32_t swizzle_mode;
-   uint32_t stride;
-
-   time_t free_time;
-
-   /** Mapped address for the buffer, saved across map/unmap cycles */
-   void *map_cpu;
-   /** GTT virtual address for the buffer, saved across map/unmap cycles */
-   void *map_gtt;
-   /** WC CPU address for the buffer, saved across map/unmap cycles */
-   void *map_wc;
-
-   /** BO cache list */
-   struct list_head head;
-
-   /**
-    * List of GEM handle exports of this buffer (bo_export).
-    *
-    * Hold bufmgr->lock when using this list.
-    */
-   struct list_head exports;
-
-   /**
-    * Boolean of whether this buffer can be re-used
-    */
-   bool reusable;
-
-   /**
-    * Boolean of whether this buffer has been shared with an external client.
-    */
-   bool external;
-
-   /**
-    * Boolean of whether this buffer is cache coherent
-    */
-   bool cache_coherent;
-};
-
-#define BO_ALLOC_BUSY       (1<<0)
-#define BO_ALLOC_ZEROED     (1<<1)
-
-/**
- * Allocate a buffer object.
- *
- * Buffer objects are not necessarily initially mapped into CPU virtual
- * address space or graphics device aperture.  They must be mapped
- * using brw_bo_map() to be used by the CPU.
- */
-struct brw_bo *brw_bo_alloc(struct brw_bufmgr *bufmgr, const char *name,
-                            uint64_t size, enum brw_memory_zone memzone);
-
-/**
- * Allocate a tiled buffer object.
- *
- * Alignment for tiled objects is set automatically; the 'flags'
- * argument provides a hint about how the object will be used initially.
- *
- * Valid tiling formats are:
- *  I915_TILING_NONE
- *  I915_TILING_X
- *  I915_TILING_Y
- */
-struct brw_bo *brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr,
-                                  const char *name,
-                                  uint64_t size,
-                                  enum brw_memory_zone memzone,
-                                  uint32_t tiling_mode,
-                                  uint32_t pitch,
-                                  unsigned flags);
-
-/**
- * Allocate a tiled buffer object.
- *
- * Alignment for tiled objects is set automatically; the 'flags'
- * argument provides a hint about how the object will be used initially.
- *
- * Valid tiling formats are:
- *  I915_TILING_NONE
- *  I915_TILING_X
- *  I915_TILING_Y
- *
- * Note the tiling format may be rejected; callers should check the
- * 'tiling_mode' field on return, as well as the pitch value, which
- * may have been rounded up to accommodate for tiling restrictions.
- */
-struct brw_bo *brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr,
-                                     const char *name,
-                                     int x, int y, int cpp,
-                                     enum brw_memory_zone memzone,
-                                     uint32_t tiling_mode,
-                                     uint32_t *pitch,
-                                     unsigned flags);
-
-/** Takes a reference on a buffer object */
-static inline void
-brw_bo_reference(struct brw_bo *bo)
-{
-   p_atomic_inc(&bo->refcount);
-}
-
-/**
- * Releases a reference on a buffer object, freeing the data if
- * no references remain.
- */
-void brw_bo_unreference(struct brw_bo *bo);
-
-/* Must match MapBufferRange interface (for convenience) */
-#define MAP_READ        GL_MAP_READ_BIT
-#define MAP_WRITE       GL_MAP_WRITE_BIT
-#define MAP_ASYNC       GL_MAP_UNSYNCHRONIZED_BIT
-#define MAP_PERSISTENT  GL_MAP_PERSISTENT_BIT
-#define MAP_COHERENT    GL_MAP_COHERENT_BIT
-/* internal */
-#define MAP_INTERNAL_MASK       (0xffu << 24)
-#define MAP_RAW                 (0x01 << 24)
-
-/**
- * Maps the buffer into userspace.
- *
- * This function will block waiting for any existing execution on the
- * buffer to complete, first.  The resulting mapping is returned.
- */
-MUST_CHECK void *brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags);
-
-/**
- * Reduces the refcount on the userspace mapping of the buffer
- * object.
- */
-static inline int brw_bo_unmap(UNUSED struct brw_bo *bo) { return 0; }
-
-/** Write data into an object. */
-int brw_bo_subdata(struct brw_bo *bo, uint64_t offset,
-                   uint64_t size, const void *data);
-/**
- * Waits for rendering to an object by the GPU to have completed.
- *
- * This is not required for any access to the BO by bo_map,
- * bo_subdata, etc.  It is merely a way for the driver to implement
- * glFinish.
- */
-void brw_bo_wait_rendering(struct brw_bo *bo);
-
-/**
- * Unref a buffer manager instance.
- */
-void brw_bufmgr_unref(struct brw_bufmgr *bufmgr);
-
-/**
- * Get the current tiling (and resulting swizzling) mode for the bo.
- *
- * \param buf Buffer to get tiling mode for
- * \param tiling_mode returned tiling mode
- * \param swizzle_mode returned swizzling mode
- */
-int brw_bo_get_tiling(struct brw_bo *bo, uint32_t *tiling_mode,
-                      uint32_t *swizzle_mode);
-
-/**
- * Create a visible name for a buffer which can be used by other apps
- *
- * \param buf Buffer to create a name for
- * \param name Returned name
- */
-int brw_bo_flink(struct brw_bo *bo, uint32_t *name);
-
-/**
- * Returns 1 if mapping the buffer for write could cause the process
- * to block, due to the object being active in the GPU.
- */
-int brw_bo_busy(struct brw_bo *bo);
-
-/**
- * Specify the volatility of the buffer.
- * \param bo Buffer to create a name for
- * \param madv The purgeable status
- *
- * Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be
- * reclaimed under memory pressure. If you subsequently require the buffer,
- * then you must pass I915_MADV_WILLNEED to mark the buffer as required.
- *
- * Returns 1 if the buffer was retained, or 0 if it was discarded whilst
- * marked as I915_MADV_DONTNEED.
- */
-int brw_bo_madvise(struct brw_bo *bo, int madv);
-
-struct brw_bufmgr *brw_bufmgr_get_for_fd(struct intel_device_info *devinfo,
-                                         int fd, bool bo_reuse);
-
-struct brw_bo *brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
-                                           const char *name,
-                                           unsigned int handle);
-
-int brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns);
-
-uint32_t brw_create_hw_context(struct brw_bufmgr *bufmgr);
-
-int brw_hw_context_set_priority(struct brw_bufmgr *bufmgr,
-                                uint32_t ctx_id,
-                                int priority);
-
-void brw_destroy_hw_context(struct brw_bufmgr *bufmgr, uint32_t ctx_id);
-
-int brw_bufmgr_get_fd(struct brw_bufmgr *bufmgr);
-
-int brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd);
-struct brw_bo *brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr,
-                                            int prime_fd);
-struct brw_bo *brw_bo_gem_create_from_prime_tiled(struct brw_bufmgr *bufmgr,
-                                                  int prime_fd,
-                                                  uint32_t tiling_mode,
-                                                  uint32_t stride);
-
-uint32_t brw_bo_export_gem_handle(struct brw_bo *bo);
-
-/**
- * Exports a bo as a GEM handle into a given DRM file descriptor
- * \param bo Buffer to export
- * \param drm_fd File descriptor where the new handle is created
- * \param out_handle Pointer to store the new handle
- *
- * Returns 0 if the buffer was successfully exported, a non zero error code
- * otherwise.
- */
-int brw_bo_export_gem_handle_for_device(struct brw_bo *bo, int drm_fd,
-                                        uint32_t *out_handle);
-
-int brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t offset,
-                 uint64_t *result);
-
-bool brw_using_softpin(struct brw_bufmgr *bufmgr);
-
-/** @{ */
-
-#if defined(__cplusplus)
-}
-#endif
-#endif /* BRW_BUFMGR_H */
diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
deleted file mode 100644
index 3fcc31a..0000000
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Copyright 2003 VMware, Inc.
- * Copyright 2009, 2012 Intel Corporation.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/mtypes.h"
-#include "main/condrender.h"
-#include "swrast/swrast.h"
-#include "drivers/common/meta.h"
-
-#include "brw_batch.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-
-#include "brw_context.h"
-#include "brw_blorp.h"
-#include "brw_defines.h"
-
-#define FILE_DEBUG_FLAG DEBUG_BLIT
-
-static const char *buffer_names[] = {
-   [BUFFER_FRONT_LEFT] = "front",
-   [BUFFER_BACK_LEFT] = "back",
-   [BUFFER_FRONT_RIGHT] = "front right",
-   [BUFFER_BACK_RIGHT] = "back right",
-   [BUFFER_DEPTH] = "depth",
-   [BUFFER_STENCIL] = "stencil",
-   [BUFFER_ACCUM] = "accum",
-   [BUFFER_COLOR0] = "color0",
-   [BUFFER_COLOR1] = "color1",
-   [BUFFER_COLOR2] = "color2",
-   [BUFFER_COLOR3] = "color3",
-   [BUFFER_COLOR4] = "color4",
-   [BUFFER_COLOR5] = "color5",
-   [BUFFER_COLOR6] = "color6",
-   [BUFFER_COLOR7] = "color7",
-};
-
-static void
-debug_mask(const char *name, GLbitfield mask)
-{
-   GLuint i;
-
-   if (INTEL_DEBUG(DEBUG_BLIT)) {
-      DBG("%s clear:", name);
-      for (i = 0; i < BUFFER_COUNT; i++) {
-         if (mask & (1 << i))
-            DBG(" %s", buffer_names[i]);
-      }
-      DBG("\n");
-   }
-}
-
-/**
- * Returns true if the scissor is a noop (cuts out nothing).
- */
-static bool
-noop_scissor(struct gl_framebuffer *fb)
-{
-   return fb->_Xmin <= 0 &&
-          fb->_Ymin <= 0 &&
-          fb->_Xmax >= fb->Width &&
-          fb->_Ymax >= fb->Height;
-}
-
-/**
- * Implements fast depth clears on gfx6+.
- *
- * Fast clears basically work by setting a flag in each of the subspans
- * represented in the HiZ buffer that says "When you need the depth values for
- * this subspan, it's the hardware's current clear value."  Then later rendering
- * can just use the static clear value instead of referencing memory.
- *
- * The tricky part of the implementation is that you have to have the clear
- * value that was used on the depth buffer in place for all further rendering,
- * at least until a resolve to the real depth buffer happens.
- */
-static bool
-brw_fast_clear_depth(struct gl_context *ctx)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
-   struct brw_renderbuffer *depth_irb =
-      brw_get_renderbuffer(fb, BUFFER_DEPTH);
-   struct brw_mipmap_tree *mt = depth_irb->mt;
-   struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH];
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR))
-      return false;
-
-   if (devinfo->ver < 6)
-      return false;
-
-   if (!brw_renderbuffer_has_hiz(depth_irb))
-      return false;
-
-   /* We only handle full buffer clears -- otherwise you'd have to track whether
-    * a previous clear had happened at a different clear value and resolve it
-    * first.
-    */
-   if ((ctx->Scissor.EnableFlags & 1) && !noop_scissor(fb)) {
-      perf_debug("Failed to fast clear %dx%d depth because of scissors.  "
-                 "Possible 5%% performance win if avoided.\n",
-                 mt->surf.logical_level0_px.width,
-                 mt->surf.logical_level0_px.height);
-      return false;
-   }
-
-   switch (mt->format) {
-   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
-   case MESA_FORMAT_Z24_UNORM_S8_UINT:
-      /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
-       *
-       *     "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
-       *      enabled (the legacy method of clearing must be performed):
-       *
-       *      - If the depth buffer format is D32_FLOAT_S8X24_UINT or
-       *        D24_UNORM_S8_UINT.
-       */
-      return false;
-
-   case MESA_FORMAT_Z_UNORM16:
-      /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
-       *
-       *     "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
-       *      enabled (the legacy method of clearing must be performed):
-       *
-       *      - DevSNB{W/A}]: When depth buffer format is D16_UNORM and the
-       *        width of the map (LOD0) is not multiple of 16, fast clear
-       *        optimization must be disabled.
-       */
-      if (devinfo->ver == 6 &&
-          (minify(mt->surf.phys_level0_sa.width,
-                  depth_irb->mt_level - mt->first_level) % 16) != 0)
-         return false;
-      break;
-
-   default:
-      break;
-   }
-
-   /* Quantize the clear value to what can be stored in the actual depth
-    * buffer.  This makes the following check more accurate because it now
-    * checks if the actual depth bits will match.  It also prevents us from
-    * getting a too-accurate depth value during depth testing or when sampling
-    * with HiZ enabled.
-    */
-   float clear_value =
-      mt->format == MESA_FORMAT_Z_FLOAT32 ? ctx->Depth.Clear :
-      _mesa_lroundeven(ctx->Depth.Clear * fb->_DepthMax) / (float)(fb->_DepthMax);
-
-   const uint32_t num_layers = depth_att->Layered ? depth_irb->layer_count : 1;
-
-   /* If we're clearing to a new clear value, then we need to resolve any clear
-    * flags out of the HiZ buffer into the real depth buffer.
-    */
-   if (mt->fast_clear_color.f32[0] != clear_value) {
-      for (uint32_t level = mt->first_level; level <= mt->last_level; level++) {
-         if (!brw_miptree_level_has_hiz(mt, level))
-            continue;
-
-         const unsigned level_layers = brw_get_num_logical_layers(mt, level);
-
-         for (uint32_t layer = 0; layer < level_layers; layer++) {
-            if (level == depth_irb->mt_level &&
-                layer >= depth_irb->mt_layer &&
-                layer < depth_irb->mt_layer + num_layers) {
-               /* We're going to clear this layer anyway.  Leave it alone. */
-               continue;
-            }
-
-            enum isl_aux_state aux_state =
-               brw_miptree_get_aux_state(mt, level, layer);
-
-            if (aux_state != ISL_AUX_STATE_CLEAR &&
-                aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR) {
-               /* This slice doesn't have any fast-cleared bits. */
-               continue;
-            }
-
-            /* If we got here, then the level may have fast-clear bits that
-             * use the old clear value.  We need to do a depth resolve to get
-             * rid of their use of the clear value before we can change it.
-             * Fortunately, few applications ever change their depth clear
-             * value so this shouldn't happen often.
-             */
-            brw_hiz_exec(brw, mt, level, layer, 1, ISL_AUX_OP_FULL_RESOLVE);
-            brw_miptree_set_aux_state(brw, mt, level, layer, 1,
-                                        ISL_AUX_STATE_RESOLVED);
-         }
-      }
-
-      const union isl_color_value clear_color = { .f32 = {clear_value, } };
-      brw_miptree_set_clear_color(brw, mt, clear_color);
-   }
-
-   for (unsigned a = 0; a < num_layers; a++) {
-      enum isl_aux_state aux_state =
-         brw_miptree_get_aux_state(mt, depth_irb->mt_level,
-                                     depth_irb->mt_layer + a);
-
-      if (aux_state != ISL_AUX_STATE_CLEAR) {
-         brw_hiz_exec(brw, mt, depth_irb->mt_level,
-                      depth_irb->mt_layer + a, 1,
-                      ISL_AUX_OP_FAST_CLEAR);
-      }
-   }
-
-   brw_miptree_set_aux_state(brw, mt, depth_irb->mt_level,
-                               depth_irb->mt_layer, num_layers,
-                               ISL_AUX_STATE_CLEAR);
-   return true;
-}
-
-/**
- * Called by ctx->Driver.Clear.
- */
-static void
-brw_clear(struct gl_context *ctx, GLbitfield mask)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   bool partial_clear = ctx->Scissor.EnableFlags && !noop_scissor(fb);
-
-   if (!_mesa_check_conditional_render(ctx))
-      return;
-
-   if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
-      brw->front_buffer_dirty = true;
-   }
-
-   brw_prepare_render(brw);
-   brw_workaround_depthstencil_alignment(brw, partial_clear ? 0 : mask);
-
-   if (mask & BUFFER_BIT_DEPTH) {
-      if (brw_fast_clear_depth(ctx)) {
-         DBG("fast clear: depth\n");
-         mask &= ~BUFFER_BIT_DEPTH;
-      }
-   }
-
-   if (mask & BUFFER_BITS_COLOR) {
-      brw_blorp_clear_color(brw, fb, mask, partial_clear,
-                            ctx->Color.sRGBEnabled);
-      debug_mask("blorp color", mask & BUFFER_BITS_COLOR);
-      mask &= ~BUFFER_BITS_COLOR;
-   }
-
-   if (devinfo->ver >= 6 && (mask & BUFFER_BITS_DEPTH_STENCIL)) {
-      brw_blorp_clear_depth_stencil(brw, fb, mask, partial_clear);
-      debug_mask("blorp depth/stencil", mask & BUFFER_BITS_DEPTH_STENCIL);
-      mask &= ~BUFFER_BITS_DEPTH_STENCIL;
-   }
-
-   GLbitfield tri_mask = mask & (BUFFER_BIT_STENCIL |
-                                 BUFFER_BIT_DEPTH);
-
-   if (tri_mask) {
-      debug_mask("tri", tri_mask);
-      mask &= ~tri_mask;
-      _mesa_meta_glsl_Clear(&brw->ctx, tri_mask);
-   }
-
-   /* Any strange buffers get passed off to swrast.  The only thing that
-    * should be left at this point is the accumulation buffer.
-    */
-   assert((mask & ~BUFFER_BIT_ACCUM) == 0);
-   if (mask) {
-      debug_mask("swrast", mask);
-      _swrast_Clear(ctx, mask);
-   }
-}
-
-
-void
-brw_init_clear_functions(struct dd_function_table *functions)
-{
-   functions->Clear = brw_clear;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c
deleted file mode 100644
index fa97f9a..0000000
--- a/src/mesa/drivers/dri/i965/brw_clip.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-#include "main/macros.h"
-#include "main/enums.h"
-
-#include "brw_batch.h"
-
-#include "brw_defines.h"
-#include "brw_context.h"
-#include "brw_util.h"
-#include "brw_state.h"
-#include "compiler/brw_eu.h"
-
-#include "util/ralloc.h"
-
-static void
-compile_clip_prog(struct brw_context *brw, struct brw_clip_prog_key *key)
-{
-   const unsigned *program;
-   void *mem_ctx;
-   unsigned program_size;
-
-   mem_ctx = ralloc_context(NULL);
-
-   struct brw_clip_prog_data prog_data;
-   program = brw_compile_clip(brw->screen->compiler, mem_ctx, key, &prog_data,
-                              &brw->vue_map_geom_out, &program_size);
-
-   brw_upload_cache(&brw->cache,
-                    BRW_CACHE_CLIP_PROG,
-                    key, sizeof(*key),
-                    program, program_size,
-                    &prog_data, sizeof(prog_data),
-                    &brw->clip.prog_offset, &brw->clip.prog_data);
-   ralloc_free(mem_ctx);
-}
-
-/* Calculate interpolants for triangle and line rasterization.
- */
-void
-brw_upload_clip_prog(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-   struct brw_clip_prog_key key;
-
-   if (!brw_state_dirty(brw,
-                        _NEW_BUFFERS |
-                        _NEW_LIGHT |
-                        _NEW_POLYGON |
-                        _NEW_TRANSFORM,
-                        BRW_NEW_BLORP |
-                        BRW_NEW_FS_PROG_DATA |
-                        BRW_NEW_REDUCED_PRIMITIVE |
-                        BRW_NEW_VUE_MAP_GEOM_OUT))
-      return;
-
-   memset(&key, 0, sizeof(key));
-
-   /* Populate the key:
-    */
-
-   /* BRW_NEW_FS_PROG_DATA */
-   const struct brw_wm_prog_data *wm_prog_data =
-      brw_wm_prog_data(brw->wm.base.prog_data);
-   if (wm_prog_data) {
-      key.contains_flat_varying = wm_prog_data->contains_flat_varying;
-      key.contains_noperspective_varying =
-         wm_prog_data->contains_noperspective_varying;
-
-      STATIC_ASSERT(sizeof(key.interp_mode) ==
-                    sizeof(wm_prog_data->interp_mode));
-      memcpy(key.interp_mode, wm_prog_data->interp_mode,
-             sizeof(key.interp_mode));
-   }
-
-   /* BRW_NEW_REDUCED_PRIMITIVE */
-   key.primitive = brw->reduced_primitive;
-   /* BRW_NEW_VUE_MAP_GEOM_OUT */
-   key.attrs = brw->vue_map_geom_out.slots_valid;
-
-   /* _NEW_LIGHT */
-   key.pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
-   /* _NEW_TRANSFORM (also part of VUE map)*/
-   if (ctx->Transform.ClipPlanesEnabled)
-      key.nr_userclip = util_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
-
-   if (devinfo->ver == 5)
-       key.clip_mode = BRW_CLIP_MODE_KERNEL_CLIP;
-   else
-       key.clip_mode = BRW_CLIP_MODE_NORMAL;
-
-   /* _NEW_POLYGON */
-   if (key.primitive == GL_TRIANGLES) {
-      if (ctx->Polygon.CullFlag &&
-          ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
-         key.clip_mode = BRW_CLIP_MODE_REJECT_ALL;
-      else {
-         GLuint fill_front = BRW_CLIP_FILL_MODE_CULL;
-         GLuint fill_back = BRW_CLIP_FILL_MODE_CULL;
-         GLuint offset_front = 0;
-         GLuint offset_back = 0;
-
-         if (!ctx->Polygon.CullFlag ||
-             ctx->Polygon.CullFaceMode != GL_FRONT) {
-            switch (ctx->Polygon.FrontMode) {
-            case GL_FILL:
-               fill_front = BRW_CLIP_FILL_MODE_FILL;
-               offset_front = 0;
-               break;
-            case GL_LINE:
-               fill_front = BRW_CLIP_FILL_MODE_LINE;
-               offset_front = ctx->Polygon.OffsetLine;
-               break;
-            case GL_POINT:
-               fill_front = BRW_CLIP_FILL_MODE_POINT;
-               offset_front = ctx->Polygon.OffsetPoint;
-               break;
-            }
-         }
-
-         if (!ctx->Polygon.CullFlag ||
-             ctx->Polygon.CullFaceMode != GL_BACK) {
-            switch (ctx->Polygon.BackMode) {
-            case GL_FILL:
-               fill_back = BRW_CLIP_FILL_MODE_FILL;
-               offset_back = 0;
-               break;
-            case GL_LINE:
-               fill_back = BRW_CLIP_FILL_MODE_LINE;
-               offset_back = ctx->Polygon.OffsetLine;
-               break;
-            case GL_POINT:
-               fill_back = BRW_CLIP_FILL_MODE_POINT;
-               offset_back = ctx->Polygon.OffsetPoint;
-               break;
-            }
-         }
-
-         if (ctx->Polygon.BackMode != GL_FILL ||
-             ctx->Polygon.FrontMode != GL_FILL) {
-            key.do_unfilled = 1;
-
-            /* Most cases the fixed function units will handle.  Cases where
-             * one or more polygon faces are unfilled will require help:
-             */
-            key.clip_mode = BRW_CLIP_MODE_CLIP_NON_REJECTED;
-
-            if (offset_back || offset_front) {
-               /* _NEW_POLYGON, _NEW_BUFFERS */
-               key.offset_units = ctx->Polygon.OffsetUnits * ctx->DrawBuffer->_MRD * 2;
-               key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD;
-               key.offset_clamp = ctx->Polygon.OffsetClamp * ctx->DrawBuffer->_MRD;
-            }
-
-            if (!brw->polygon_front_bit) {
-               key.fill_ccw = fill_front;
-               key.fill_cw = fill_back;
-               key.offset_ccw = offset_front;
-               key.offset_cw = offset_back;
-               if (ctx->Light.Model.TwoSide &&
-                   key.fill_cw != BRW_CLIP_FILL_MODE_CULL)
-                  key.copy_bfc_cw = 1;
-            } else {
-               key.fill_cw = fill_front;
-               key.fill_ccw = fill_back;
-               key.offset_cw = offset_front;
-               key.offset_ccw = offset_back;
-               if (ctx->Light.Model.TwoSide &&
-                   key.fill_ccw != BRW_CLIP_FILL_MODE_CULL)
-                  key.copy_bfc_ccw = 1;
-            }
-         }
-      }
-   }
-
-   if (!brw_search_cache(&brw->cache, BRW_CACHE_CLIP_PROG, &key, sizeof(key),
-                         &brw->clip.prog_offset, &brw->clip.prog_data, true)) {
-      compile_clip_prog( brw, &key );
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
deleted file mode 100644
index 0b0ecbd..0000000
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright Â© 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include <sys/errno.h>
-
-#include "main/condrender.h"
-#include "main/mtypes.h"
-#include "main/state.h"
-#include "brw_context.h"
-#include "brw_draw.h"
-#include "brw_state.h"
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-#include "brw_defines.h"
-
-
-static void
-brw_dispatch_compute_common(struct gl_context *ctx)
-{
-   struct brw_context *brw = brw_context(ctx);
-   bool fail_next;
-
-   if (!_mesa_check_conditional_render(ctx))
-      return;
-
-   if (ctx->NewState)
-      _mesa_update_state(ctx);
-
-   brw_validate_textures(brw);
-
-   brw_predraw_resolve_inputs(brw, false, NULL);
-
-   /* Flush the batch if the batch/state buffers are nearly full.  We can
-    * grow them if needed, but this is not free, so we'd like to avoid it.
-    */
-   brw_batch_require_space(brw, 600);
-   brw_require_statebuffer_space(brw, 2500);
-   brw_batch_save_state(brw);
-   fail_next = brw_batch_saved_state_is_empty(brw);
-
- retry:
-   brw->batch.no_wrap = true;
-   brw_upload_compute_state(brw);
-
-   brw->vtbl.emit_compute_walker(brw);
-
-   brw->batch.no_wrap = false;
-
-   if (!brw_batch_has_aperture_space(brw, 0)) {
-      if (!fail_next) {
-         brw_batch_reset_to_saved(brw);
-         brw_batch_flush(brw);
-         fail_next = true;
-         goto retry;
-      } else {
-         int ret = brw_batch_flush(brw);
-         WARN_ONCE(ret == -ENOSPC,
-                   "i965: Single compute shader dispatch "
-                   "exceeded available aperture space\n");
-      }
-   }
-
-   /* Now that we know we haven't run out of aperture space, we can safely
-    * reset the dirty bits.
-    */
-   brw_compute_state_finished(brw);
-
-   if (brw->always_flush_batch)
-      brw_batch_flush(brw);
-
-   brw_program_cache_check_size(brw);
-
-   /* Note: since compute shaders can't write to framebuffers, there's no need
-    * to call brw_postdraw_set_buffers_need_resolve().
-    */
-}
-
-static void
-brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
-   struct brw_context *brw = brw_context(ctx);
-
-   brw->compute.num_work_groups_bo = NULL;
-   brw->compute.num_work_groups = num_groups;
-   brw->compute.group_size = NULL;
-   ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
-
-   brw_dispatch_compute_common(ctx);
-}
-
-static void
-brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
-{
-   struct brw_context *brw = brw_context(ctx);
-   static const GLuint indirect_group_counts[3] = { 0, 0, 0 };
-   struct gl_buffer_object *indirect_buffer = ctx->DispatchIndirectBuffer;
-   struct brw_bo *bo =
-      brw_bufferobj_buffer(brw,
-                           brw_buffer_object(indirect_buffer),
-                           indirect, 3 * sizeof(GLuint), false);
-
-   brw->compute.num_work_groups_bo = bo;
-   brw->compute.num_work_groups_offset = indirect;
-   brw->compute.num_work_groups = indirect_group_counts;
-   brw->compute.group_size = NULL;
-   ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
-
-   brw_dispatch_compute_common(ctx);
-}
-
-static void
-brw_dispatch_compute_group_size(struct gl_context *ctx,
-                                const GLuint *num_groups,
-                                const GLuint *group_size)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   brw->compute.num_work_groups_bo = NULL;
-   brw->compute.num_work_groups = num_groups;
-   brw->compute.group_size = group_size;
-   ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
-
-   brw_dispatch_compute_common(ctx);
-}
-
-void
-brw_init_compute_functions(struct dd_function_table *functions)
-{
-   functions->DispatchCompute = brw_dispatch_compute;
-   functions->DispatchComputeIndirect = brw_dispatch_compute_indirect;
-   functions->DispatchComputeGroupSize = brw_dispatch_compute_group_size;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c b/src/mesa/drivers/dri/i965/brw_conditional_render.c
deleted file mode 100644
index 2736624..0000000
--- a/src/mesa/drivers/dri/i965/brw_conditional_render.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright Â© 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Neil Roberts <neil@linux.intel.com>
- */
-
-/** @file brw_conditional_render.c
- *
- * Support for conditional rendering based on query objects
- * (GL_NV_conditional_render, GL_ARB_conditional_render_inverted) on Gfx7+.
- */
-
-#include "main/condrender.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_batch.h"
-
-static void
-set_predicate_enable(struct brw_context *brw,
-                     bool value)
-{
-   if (value)
-      brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
-   else
-      brw->predicate.state = BRW_PREDICATE_STATE_DONT_RENDER;
-}
-
-static void
-set_predicate_for_overflow_query(struct brw_context *brw,
-                                 struct brw_query_object *query,
-                                 int stream_start, int count)
-{
-   if (!can_do_mi_math_and_lrr(brw->screen)) {
-      brw->predicate.state = BRW_PREDICATE_STATE_STALL_FOR_QUERY;
-      return;
-   }
-
-   brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
-
-   /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
-    * command when loading the values into the predicate source registers for
-    * conditional rendering.
-    */
-   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
-
-   hsw_overflow_result_to_gpr0(brw, query, count);
-   brw_load_register_reg64(brw, MI_PREDICATE_SRC0, HSW_CS_GPR(0));
-   brw_load_register_imm64(brw, MI_PREDICATE_SRC1, 0ull);
-}
-
-static void
-set_predicate_for_occlusion_query(struct brw_context *brw,
-                                  struct brw_query_object *query)
-{
-   if (!brw->predicate.supported) {
-      brw->predicate.state = BRW_PREDICATE_STATE_STALL_FOR_QUERY;
-      return;
-   }
-
-   brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
-
-   /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
-    * command when loading the values into the predicate source registers for
-    * conditional rendering.
-    */
-   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
-
-   brw_load_register_mem64(brw, MI_PREDICATE_SRC0, query->bo, 0 /* offset */);
-   brw_load_register_mem64(brw, MI_PREDICATE_SRC1, query->bo, 8 /* offset */);
-}
-
-static void
-set_predicate_for_result(struct brw_context *brw,
-                         struct brw_query_object *query,
-                         bool inverted)
-{
-   int load_op;
-
-   assert(query->bo != NULL);
-
-   switch (query->Base.Target) {
-   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
-      set_predicate_for_overflow_query(brw, query, 0, 1);
-      break;
-   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
-      set_predicate_for_overflow_query(brw, query, 0, MAX_VERTEX_STREAMS);
-      break;
-   default:
-      set_predicate_for_occlusion_query(brw, query);
-   }
-
-   if (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) {
-      if (inverted)
-         load_op = MI_PREDICATE_LOADOP_LOAD;
-      else
-         load_op = MI_PREDICATE_LOADOP_LOADINV;
-
-      BEGIN_BATCH(1);
-      OUT_BATCH(GFX7_MI_PREDICATE |
-                load_op |
-                MI_PREDICATE_COMBINEOP_SET |
-                MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
-      ADVANCE_BATCH();
-   }
-}
-
-static void
-brw_begin_conditional_render(struct gl_context *ctx,
-                             struct gl_query_object *q,
-                             GLenum mode)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_query_object *query = (struct brw_query_object *) q;
-   bool inverted;
-
-   switch (mode) {
-   case GL_QUERY_WAIT:
-   case GL_QUERY_NO_WAIT:
-   case GL_QUERY_BY_REGION_WAIT:
-   case GL_QUERY_BY_REGION_NO_WAIT:
-      inverted = false;
-      break;
-   case GL_QUERY_WAIT_INVERTED:
-   case GL_QUERY_NO_WAIT_INVERTED:
-   case GL_QUERY_BY_REGION_WAIT_INVERTED:
-   case GL_QUERY_BY_REGION_NO_WAIT_INVERTED:
-      inverted = true;
-      break;
-   default:
-      unreachable("Unexpected conditional render mode");
-   }
-
-   /* If there are already samples from a BLT operation or if the query object
-    * is ready then we can avoid looking at the values in the buffer and just
-    * decide whether to draw using the CPU without stalling.
-    */
-   if (query->Base.Result || query->Base.Ready)
-      set_predicate_enable(brw, (query->Base.Result != 0) ^ inverted);
-   else
-      set_predicate_for_result(brw, query, inverted);
-}
-
-static void
-brw_end_conditional_render(struct gl_context *ctx,
-                           struct gl_query_object *q)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   /* When there is no longer a conditional render in progress it should
-    * always render.
-    */
-   brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
-}
-
-void
-brw_init_conditional_render_functions(struct dd_function_table *functions)
-{
-   functions->BeginConditionalRender = brw_begin_conditional_render;
-   functions->EndConditionalRender = brw_end_conditional_render;
-}
-
-bool
-brw_check_conditional_render(struct brw_context *brw)
-{
-   if (brw->predicate.state == BRW_PREDICATE_STATE_STALL_FOR_QUERY) {
-      perf_debug("Conditional rendering is implemented in software and may "
-                 "stall.\n");
-      return _mesa_check_conditional_render(&brw->ctx);
-   }
-
-   return brw->predicate.state != BRW_PREDICATE_STATE_DONT_RENDER;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
deleted file mode 100644
index af8b349..0000000
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ /dev/null
@@ -1,1975 +0,0 @@
-/*
- Copyright 2003 VMware, Inc.
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-
-#include "compiler/nir/nir.h"
-#include "main/api_exec.h"
-#include "main/context.h"
-#include "main/fbobject.h"
-#include "main/extensions.h"
-#include "main/glthread.h"
-#include "main/macros.h"
-#include "main/points.h"
-#include "main/version.h"
-#include "main/vtxfmt.h"
-#include "main/texobj.h"
-#include "main/framebuffer.h"
-#include "main/stencil.h"
-#include "main/state.h"
-#include "main/spirv_extensions.h"
-#include "main/externalobjects.h"
-
-#include "vbo/vbo.h"
-
-#include "drivers/common/driverfuncs.h"
-#include "drivers/common/meta.h"
-#include "utils.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_blorp.h"
-#include "brw_draw.h"
-#include "brw_state.h"
-
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-#include "brw_buffers.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-#include "brw_pixel.h"
-#include "brw_image.h"
-#include "brw_tex.h"
-#include "brw_tex_obj.h"
-
-#include "swrast_setup/swrast_setup.h"
-#include "tnl/tnl.h"
-#include "tnl/t_pipeline.h"
-#include "util/ralloc.h"
-#include "util/debug.h"
-#include "util/disk_cache.h"
-#include "util/u_memory.h"
-#include "isl/isl.h"
-
-#include "common/intel_defines.h"
-#include "common/intel_uuid.h"
-
-#include "compiler/spirv/nir_spirv.h"
-/***************************************
- * Mesa's Driver Functions
- ***************************************/
-
-const char *const brw_vendor_string = "Intel Open Source Technology Center";
-
-const char *
-brw_get_renderer_string(const struct brw_screen *screen)
-{
-   static char buf[128];
-   const char *name = screen->devinfo.name;
-
-   if (!name)
-      name = "Intel Unknown";
-
-   snprintf(buf, sizeof(buf), "Mesa DRI %s", name);
-
-   return buf;
-}
-
-static const GLubyte *
-brw_get_string(struct gl_context * ctx, GLenum name)
-{
-   const struct brw_context *const brw = brw_context(ctx);
-
-   switch (name) {
-   case GL_VENDOR:
-      return (GLubyte *) brw_vendor_string;
-
-   case GL_RENDERER:
-      return
-         (GLubyte *) brw_get_renderer_string(brw->screen);
-
-   default:
-      return NULL;
-   }
-}
-
-static void
-brw_set_background_context(struct gl_context *ctx,
-                           UNUSED struct util_queue_monitoring *queue_info)
-{
-   struct brw_context *brw = brw_context(ctx);
-   __DRIcontext *driContext = brw->driContext;
-   __DRIscreen *driScreen = driContext->driScreenPriv;
-   const __DRIbackgroundCallableExtension *backgroundCallable =
-      driScreen->dri2.backgroundCallable;
-
-   /* Note: Mesa will only call this function if we've called
-    * _mesa_enable_multithreading().  We only do that if the loader exposed
-    * the __DRI_BACKGROUND_CALLABLE extension.  So we know that
-    * backgroundCallable is not NULL.
-    */
-   backgroundCallable->setBackgroundContext(driContext->loaderPrivate);
-}
-
-static struct gl_memory_object *
-brw_new_memoryobj(struct gl_context *ctx, GLuint name)
-{
-   struct brw_memory_object *memory_object = CALLOC_STRUCT(brw_memory_object);
-   if (!memory_object)
-      return NULL;
-
-   _mesa_initialize_memory_object(ctx, &memory_object->Base, name);
-   return &memory_object->Base;
-}
-
-static void
-brw_delete_memoryobj(struct gl_context *ctx, struct gl_memory_object *memObj)
-{
-   struct brw_memory_object *memory_object = brw_memory_object(memObj);
-   brw_bo_unreference(memory_object->bo);
-   _mesa_delete_memory_object(ctx, memObj);
-}
-
-static void
-brw_import_memoryobj_fd(struct gl_context *ctx,
-                       struct gl_memory_object *obj,
-                       GLuint64 size,
-                       int fd)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_memory_object *memory_object = brw_memory_object(obj);
-
-   memory_object->bo = brw_bo_gem_create_from_prime(brw->bufmgr, fd);
-   brw_bo_reference(memory_object->bo);
-   assert(memory_object->bo->size >= size);
-   close(fd);
-}
-
-static void
-brw_viewport(struct gl_context *ctx)
-{
-   struct brw_context *brw = brw_context(ctx);
-   __DRIcontext *driContext = brw->driContext;
-
-   if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
-      if (driContext->driDrawablePriv)
-         dri2InvalidateDrawable(driContext->driDrawablePriv);
-      if (driContext->driReadablePriv)
-         dri2InvalidateDrawable(driContext->driReadablePriv);
-   }
-}
-
-static void
-brw_update_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   /* Quantize the derived default number of samples
-    */
-   fb->DefaultGeometry._NumSamples =
-      brw_quantize_num_samples(brw->screen, fb->DefaultGeometry.NumSamples);
-}
-
-static void
-brw_update_state(struct gl_context * ctx)
-{
-   GLuint new_state = ctx->NewState;
-   struct brw_context *brw = brw_context(ctx);
-
-   if (ctx->swrast_context)
-      _swrast_InvalidateState(ctx, new_state);
-
-   brw->NewGLState |= new_state;
-
-   if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
-      _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
-
-   if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) {
-      brw->stencil_enabled = _mesa_stencil_is_enabled(ctx);
-      brw->stencil_two_sided = _mesa_stencil_is_two_sided(ctx);
-      brw->stencil_write_enabled =
-         _mesa_stencil_is_write_enabled(ctx, brw->stencil_two_sided);
-   }
-
-   if (new_state & _NEW_POLYGON)
-      brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx);
-
-   if (new_state & _NEW_BUFFERS) {
-      brw_update_framebuffer(ctx, ctx->DrawBuffer);
-      if (ctx->DrawBuffer != ctx->ReadBuffer)
-         brw_update_framebuffer(ctx, ctx->ReadBuffer);
-   }
-}
-
-#define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
-
-static void
-brw_flush_front(struct gl_context *ctx)
-{
-   struct brw_context *brw = brw_context(ctx);
-   __DRIcontext *driContext = brw->driContext;
-   __DRIdrawable *driDrawable = driContext->driDrawablePriv;
-   __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
-
-   if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
-      if (flushFront(dri_screen) && driDrawable &&
-          driDrawable->loaderPrivate) {
-
-         /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
-          *
-          * This potentially resolves both front and back buffer. It
-          * is unnecessary to resolve the back, but harms nothing except
-          * performance. And no one cares about front-buffer render
-          * performance.
-          */
-         brw_resolve_for_dri2_flush(brw, driDrawable);
-         brw_batch_flush(brw);
-
-         flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate);
-
-         /* We set the dirty bit in brw_prepare_render() if we're
-          * front buffer rendering once we get there.
-          */
-         brw->front_buffer_dirty = false;
-      }
-   }
-}
-
-static void
-brw_display_shared_buffer(struct brw_context *brw)
-{
-   __DRIcontext *dri_context = brw->driContext;
-   __DRIdrawable *dri_drawable = dri_context->driDrawablePriv;
-   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
-   int fence_fd = -1;
-
-   if (!brw->is_shared_buffer_bound)
-      return;
-
-   if (!brw->is_shared_buffer_dirty)
-      return;
-
-   if (brw->screen->has_exec_fence) {
-      /* This function is always called during a flush operation, so there is
-       * no need to flush again here. But we want to provide a fence_fd to the
-       * loader, and a redundant flush is the easiest way to acquire one.
-       */
-      if (brw_batch_flush_fence(brw, -1, &fence_fd))
-         return;
-   }
-
-   dri_screen->mutableRenderBuffer.loader
-      ->displaySharedBuffer(dri_drawable, fence_fd,
-                            dri_drawable->loaderPrivate);
-   brw->is_shared_buffer_dirty = false;
-}
-
-static void
-brw_glFlush(struct gl_context *ctx, unsigned gallium_flush_flags)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   brw_batch_flush(brw);
-   brw_flush_front(ctx);
-   brw_display_shared_buffer(brw);
-   brw->need_flush_throttle = true;
-}
-
-static void
-brw_glEnable(struct gl_context *ctx, GLenum cap, GLboolean state)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   switch (cap) {
-   case GL_BLACKHOLE_RENDER_INTEL:
-      brw->frontend_noop = state;
-      brw_batch_flush(brw);
-      brw_batch_maybe_noop(brw);
-      /* Because we started previous batches with a potential
-       * MI_BATCH_BUFFER_END if NOOP was enabled, that means that anything
-       * that was ever emitted after that never made it to the HW. So when the
-       * blackhole state changes from NOOP->!NOOP reupload the entire state.
-       */
-      if (!brw->frontend_noop) {
-         brw->NewGLState = ~0u;
-         brw->ctx.NewDriverState = ~0ull;
-      }
-      break;
-   default:
-      break;
-   }
-}
-
-static void
-brw_finish(struct gl_context * ctx)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   brw_glFlush(ctx, 0);
-
-   if (brw->batch.last_bo)
-      brw_bo_wait_rendering(brw->batch.last_bo);
-}
-
-static void
-brw_get_device_uuid(struct gl_context *ctx, char *uuid)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_screen *screen = brw->screen;
-
-   assert(GL_UUID_SIZE_EXT >= PIPE_UUID_SIZE);
-   memset(uuid, 0, GL_UUID_SIZE_EXT);
-   intel_uuid_compute_device_id((uint8_t *)uuid, &screen->isl_dev, PIPE_UUID_SIZE);
-}
-
-
-static void
-brw_get_driver_uuid(struct gl_context *ctx, char *uuid)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_screen *screen = brw->screen;
-
-   assert(GL_UUID_SIZE_EXT >= PIPE_UUID_SIZE);
-   memset(uuid, 0, GL_UUID_SIZE_EXT);
-   intel_uuid_compute_driver_id((uint8_t *)uuid, &screen->devinfo, PIPE_UUID_SIZE);
-}
-
-static void
-brw_init_driver_functions(struct brw_context *brw,
-                          struct dd_function_table *functions)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   _mesa_init_driver_functions(functions);
-
-   /* GLX uses DRI2 invalidate events to handle window resizing.
-    * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
-    * which doesn't provide a mechanism for snooping the event queues.
-    *
-    * So EGL still relies on viewport hacks to handle window resizing.
-    * This should go away with DRI3000.
-    */
-   if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
-      functions->Viewport = brw_viewport;
-
-   functions->Enable = brw_glEnable;
-   functions->Flush = brw_glFlush;
-   functions->Finish = brw_finish;
-   functions->GetString = brw_get_string;
-   functions->UpdateState = brw_update_state;
-
-   brw_init_draw_functions(functions);
-   brw_init_texture_functions(functions);
-   brw_init_texture_image_functions(functions);
-   brw_init_texture_copy_image_functions(functions);
-   brw_init_copy_image_functions(functions);
-   brw_init_clear_functions(functions);
-   brw_init_buffer_functions(functions);
-   brw_init_pixel_functions(functions);
-   brw_init_buffer_object_functions(functions);
-   brw_init_syncobj_functions(functions);
-   brw_init_object_purgeable_functions(functions);
-
-   brw_init_frag_prog_functions(functions);
-   brw_init_common_queryobj_functions(functions);
-   if (devinfo->verx10 >= 75)
-      hsw_init_queryobj_functions(functions);
-   else if (devinfo->ver >= 6)
-      gfx6_init_queryobj_functions(functions);
-   else
-      gfx4_init_queryobj_functions(functions);
-   brw_init_compute_functions(functions);
-   brw_init_conditional_render_functions(functions);
-
-   functions->GenerateMipmap = brw_generate_mipmap;
-
-   functions->QueryInternalFormat = brw_query_internal_format;
-
-   functions->NewTransformFeedback = brw_new_transform_feedback;
-   functions->DeleteTransformFeedback = brw_delete_transform_feedback;
-   if (can_do_mi_math_and_lrr(brw->screen)) {
-      functions->BeginTransformFeedback = hsw_begin_transform_feedback;
-      functions->EndTransformFeedback = hsw_end_transform_feedback;
-      functions->PauseTransformFeedback = hsw_pause_transform_feedback;
-      functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
-   } else if (devinfo->ver >= 7) {
-      functions->BeginTransformFeedback = gfx7_begin_transform_feedback;
-      functions->EndTransformFeedback = gfx7_end_transform_feedback;
-      functions->PauseTransformFeedback = gfx7_pause_transform_feedback;
-      functions->ResumeTransformFeedback = gfx7_resume_transform_feedback;
-      functions->GetTransformFeedbackVertexCount =
-         brw_get_transform_feedback_vertex_count;
-   } else {
-      functions->BeginTransformFeedback = brw_begin_transform_feedback;
-      functions->EndTransformFeedback = brw_end_transform_feedback;
-      functions->PauseTransformFeedback = brw_pause_transform_feedback;
-      functions->ResumeTransformFeedback = brw_resume_transform_feedback;
-      functions->GetTransformFeedbackVertexCount =
-         brw_get_transform_feedback_vertex_count;
-   }
-
-   if (devinfo->ver >= 6)
-      functions->GetSamplePosition = gfx6_get_sample_position;
-
-   /* GL_ARB_get_program_binary */
-   brw_program_binary_init(brw->screen->deviceID);
-   functions->GetProgramBinaryDriverSHA1 = brw_get_program_binary_driver_sha1;
-   functions->ProgramBinarySerializeDriverBlob = brw_serialize_program_binary;
-   functions->ProgramBinaryDeserializeDriverBlob =
-      brw_deserialize_program_binary;
-
-   if (brw->screen->disk_cache) {
-      functions->ShaderCacheSerializeDriverBlob = brw_program_serialize_nir;
-   }
-
-   functions->SetBackgroundContext = brw_set_background_context;
-
-   functions->NewMemoryObject = brw_new_memoryobj;
-   functions->DeleteMemoryObject = brw_delete_memoryobj;
-   functions->ImportMemoryObjectFd = brw_import_memoryobj_fd;
-   functions->GetDeviceUuid = brw_get_device_uuid;
-   functions->GetDriverUuid = brw_get_driver_uuid;
-}
-
-static void
-brw_initialize_spirv_supported_capabilities(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-
-   /* The following SPIR-V capabilities are only supported on gfx7+. In theory
-    * you should enable the extension only on gfx7+, but just in case let's
-    * assert it.
-    */
-   assert(devinfo->ver >= 7);
-
-   ctx->Const.SpirVCapabilities.atomic_storage = devinfo->ver >= 7;
-   ctx->Const.SpirVCapabilities.draw_parameters = true;
-   ctx->Const.SpirVCapabilities.float64 = devinfo->ver >= 8;
-   ctx->Const.SpirVCapabilities.geometry_streams = devinfo->ver >= 7;
-   ctx->Const.SpirVCapabilities.image_write_without_format = true;
-   ctx->Const.SpirVCapabilities.int64 = devinfo->ver >= 8;
-   ctx->Const.SpirVCapabilities.tessellation = true;
-   ctx->Const.SpirVCapabilities.transform_feedback = devinfo->ver >= 7;
-   ctx->Const.SpirVCapabilities.variable_pointers = true;
-   ctx->Const.SpirVCapabilities.integer_functions2 = devinfo->ver >= 8;
-}
-
-static void
-brw_initialize_context_constants(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-   const struct brw_compiler *compiler = brw->screen->compiler;
-
-   const bool stage_exists[MESA_SHADER_STAGES] = {
-      [MESA_SHADER_VERTEX] = true,
-      [MESA_SHADER_TESS_CTRL] = devinfo->ver >= 7,
-      [MESA_SHADER_TESS_EVAL] = devinfo->ver >= 7,
-      [MESA_SHADER_GEOMETRY] = devinfo->ver >= 6,
-      [MESA_SHADER_FRAGMENT] = true,
-      [MESA_SHADER_COMPUTE] =
-         (_mesa_is_desktop_gl(ctx) &&
-          ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
-         (ctx->API == API_OPENGLES2 &&
-          ctx->Const.MaxComputeWorkGroupSize[0] >= 128),
-   };
-
-   unsigned num_stages = 0;
-   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
-      if (stage_exists[i])
-         num_stages++;
-   }
-
-   unsigned max_samplers =
-      devinfo->verx10 >= 75 ? BRW_MAX_TEX_UNIT : 16;
-
-   ctx->Const.MaxDualSourceDrawBuffers = 1;
-   ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
-   ctx->Const.MaxCombinedShaderOutputResources =
-      MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
-
-   /* The timestamp register we can read for glGetTimestamp() is
-    * sometimes only 32 bits, before scaling to nanoseconds (depending
-    * on kernel).
-    *
-    * Once scaled to nanoseconds the timestamp would roll over at a
-    * non-power-of-two, so an application couldn't use
-    * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
-    * report 36 bits and truncate at that (rolling over 5 times as
-    * often as the HW counter), and when the 32-bit counter rolls
-    * over, it happens to also be at a rollover in the reported value
-    * from near (1<<36) to 0.
-    *
-    * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
-    * rolls over every ~69 seconds.
-    */
-   ctx->Const.QueryCounterBits.Timestamp = 36;
-
-   ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
-   ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
-   if (devinfo->ver >= 7) {
-      ctx->Const.MaxRenderbufferSize = 16384;
-      ctx->Const.MaxTextureSize = 16384;
-      ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
-   } else {
-      ctx->Const.MaxRenderbufferSize = 8192;
-      ctx->Const.MaxTextureSize = 8192;
-      ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
-   }
-   ctx->Const.Max3DTextureLevels = 12; /* 2048 */
-   ctx->Const.MaxArrayTextureLayers = devinfo->ver >= 7 ? 2048 : 512;
-   ctx->Const.MaxTextureMbytes = 1536;
-   ctx->Const.MaxTextureRectSize = devinfo->ver >= 7 ? 16384 : 8192;
-   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
-   ctx->Const.MaxTextureLodBias = 15.0;
-   ctx->Const.StripTextureBorder = true;
-   if (devinfo->ver >= 7) {
-      ctx->Const.MaxProgramTextureGatherComponents = 4;
-      ctx->Const.MinProgramTextureGatherOffset = -32;
-      ctx->Const.MaxProgramTextureGatherOffset = 31;
-   } else if (devinfo->ver == 6) {
-      ctx->Const.MaxProgramTextureGatherComponents = 1;
-      ctx->Const.MinProgramTextureGatherOffset = -8;
-      ctx->Const.MaxProgramTextureGatherOffset = 7;
-   }
-
-   ctx->Const.MaxUniformBlockSize = 65536;
-
-   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
-      struct gl_program_constants *prog = &ctx->Const.Program[i];
-
-      if (!stage_exists[i])
-         continue;
-
-      prog->MaxTextureImageUnits = max_samplers;
-
-      prog->MaxUniformBlocks = BRW_MAX_UBO;
-      prog->MaxCombinedUniformComponents =
-         prog->MaxUniformComponents +
-         ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
-
-      prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
-      prog->MaxAtomicBuffers = BRW_MAX_ABO;
-      prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
-      prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
-   }
-
-   ctx->Const.MaxTextureUnits =
-      MIN2(ctx->Const.MaxTextureCoordUnits,
-           ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
-
-   ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
-   ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
-   ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
-   ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
-   ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
-   ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
-   ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
-
-
-   /* Hardware only supports a limited number of transform feedback buffers.
-    * So we need to override the Mesa default (which is based only on software
-    * limits).
-    */
-   ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
-
-   /* On Gfx6, in the worst case, we use up one binding table entry per
-    * transform feedback component (see comments above the definition of
-    * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
-    * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
-    * BRW_MAX_SOL_BINDINGS.
-    *
-    * In "separate components" mode, we need to divide this value by
-    * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
-    * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
-    */
-   ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
-   ctx->Const.MaxTransformFeedbackSeparateComponents =
-      BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
-
-   ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
-      !can_do_mi_math_and_lrr(brw->screen);
-
-   int max_samples;
-   const int *msaa_modes = brw_supported_msaa_modes(brw->screen);
-   const int clamp_max_samples =
-      driQueryOptioni(&brw->screen->optionCache, "clamp_max_samples");
-
-   if (clamp_max_samples < 0) {
-      max_samples = msaa_modes[0];
-   } else {
-      /* Select the largest supported MSAA mode that does not exceed
-       * clamp_max_samples.
-       */
-      max_samples = 0;
-      for (int i = 0; msaa_modes[i] != 0; ++i) {
-         if (msaa_modes[i] <= clamp_max_samples) {
-            max_samples = msaa_modes[i];
-            break;
-         }
-      }
-   }
-
-   ctx->Const.MaxSamples = max_samples;
-   ctx->Const.MaxColorTextureSamples = max_samples;
-   ctx->Const.MaxDepthTextureSamples = max_samples;
-   ctx->Const.MaxIntegerSamples = max_samples;
-   ctx->Const.MaxImageSamples = 0;
-
-   ctx->Const.MinLineWidth = 1.0;
-   ctx->Const.MinLineWidthAA = 1.0;
-   if (devinfo->ver >= 6) {
-      ctx->Const.MaxLineWidth = 7.375;
-      ctx->Const.MaxLineWidthAA = 7.375;
-      ctx->Const.LineWidthGranularity = 0.125;
-   } else {
-      ctx->Const.MaxLineWidth = 7.0;
-      ctx->Const.MaxLineWidthAA = 7.0;
-      ctx->Const.LineWidthGranularity = 0.5;
-   }
-
-   /* For non-antialiased lines, we have to round the line width to the
-    * nearest whole number. Make sure that we don't advertise a line
-    * width that, when rounded, will be beyond the actual hardware
-    * maximum.
-    */
-   assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
-
-   ctx->Const.MinPointSize = 1.0;
-   ctx->Const.MinPointSizeAA = 1.0;
-   ctx->Const.MaxPointSize = 255.0;
-   ctx->Const.MaxPointSizeAA = 255.0;
-   ctx->Const.PointSizeGranularity = 1.0;
-
-   if (devinfo->verx10 >= 45)
-      ctx->Const.MaxClipPlanes = 8;
-
-   ctx->Const.GLSLFragCoordIsSysVal = true;
-   ctx->Const.GLSLFrontFacingIsSysVal = true;
-   ctx->Const.GLSLTessLevelsAsInputs = true;
-   ctx->Const.PrimitiveRestartForPatches = true;
-
-   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
-   ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
-   ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
-   ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
-   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
-   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
-   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
-   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
-   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
-   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
-   ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
-   ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
-      MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
-           ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
-
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
-      MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
-           ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
-
-   /* Fragment shaders use real, 32-bit twos-complement integers for all
-    * integer types.
-    */
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
-   ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
-
-   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
-   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
-   ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
-   ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
-   ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
-
-   /* Gfx6 converts quads to polygon in beginning of 3D pipeline,
-    * but we're not sure how it's actually done for vertex order,
-    * that affect provoking vertex decision. Always use last vertex
-    * convention for quad primitive which works as expected for now.
-    */
-   if (devinfo->ver >= 6)
-      ctx->Const.QuadsFollowProvokingVertexConvention = false;
-
-   ctx->Const.NativeIntegers = true;
-
-   /* Regarding the CMP instruction, the Ivybridge PRM says:
-    *
-    *   "For each enabled channel 0b or 1b is assigned to the appropriate flag
-    *    bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
-    *    0xFFFFFFFF) is assigned to dst."
-    *
-    * but PRMs for earlier generations say
-    *
-    *   "In dword format, one GRF may store up to 8 results. When the register
-    *    is used later as a vector of Booleans, as only LSB at each channel
-    *    contains meaning [sic] data, software should make sure all higher bits
-    *    are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
-    *
-    * We select the representation of a true boolean uniform to be ~0, and fix
-    * the results of Gen <= 5 CMP instruction's with -(result & 1).
-    */
-   ctx->Const.UniformBooleanTrue = ~0;
-
-   /* From the gfx4 PRM, volume 4 page 127:
-    *
-    *     "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
-    *      the base address of the first element of the surface, computed in
-    *      software by adding the surface base address to the byte offset of
-    *      the element in the buffer."
-    *
-    * However, unaligned accesses are slower, so enforce buffer alignment.
-    *
-    * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional
-    * restriction: the start of the buffer needs to be 32B aligned.
-    */
-   ctx->Const.UniformBufferOffsetAlignment = 32;
-
-   /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
-    * that we can safely have the CPU and GPU writing the same SSBO on
-    * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
-    * writes, so there's no problem. For an SSBO, the GPU and the CPU can
-    * be updating disjoint regions of the buffer simultaneously and that will
-    * break if the regions overlap the same cacheline.
-    */
-   ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
-   ctx->Const.TextureBufferOffsetAlignment = 16;
-   ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
-
-   if (devinfo->ver >= 6) {
-      ctx->Const.MaxVarying = 32;
-      ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
-      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents =
-         compiler->scalar_stage[MESA_SHADER_GEOMETRY] ? 128 : 64;
-      ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
-      ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
-      ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
-      ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
-      ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
-      ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
-   }
-
-   /* We want the GLSL compiler to emit code that uses condition codes */
-   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
-      ctx->Const.ShaderCompilerOptions[i] =
-         brw->screen->compiler->glsl_compiler_options[i];
-   }
-
-   if (devinfo->ver >= 7) {
-      ctx->Const.MaxViewportWidth = 32768;
-      ctx->Const.MaxViewportHeight = 32768;
-   }
-
-   /* ARB_viewport_array, OES_viewport_array */
-   if (devinfo->ver >= 6) {
-      ctx->Const.MaxViewports = GFX6_NUM_VIEWPORTS;
-      ctx->Const.ViewportSubpixelBits = 8;
-
-      /* Cast to float before negating because MaxViewportWidth is unsigned.
-       */
-      ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
-      ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
-   }
-
-   /* ARB_gpu_shader5 */
-   if (devinfo->ver >= 7)
-      ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
-
-   /* ARB_framebuffer_no_attachments */
-   ctx->Const.MaxFramebufferWidth = 16384;
-   ctx->Const.MaxFramebufferHeight = 16384;
-   ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
-   ctx->Const.MaxFramebufferSamples = max_samples;
-
-   /* OES_primitive_bounding_box */
-   ctx->Const.NoPrimitiveBoundingBoxOutput = true;
-
-   /* TODO: We should be able to use STD430 packing by default on all hardware
-    * but some piglit tests [1] currently fail on SNB when this is enabled.
-    * The problem is the messages we're using for doing uniform pulls
-    * in the vec4 back-end on SNB is the OWORD block load instruction, which
-    * takes its offset in units of OWORDS (16 bytes).  On IVB+, we use the
-    * sampler which doesn't have these restrictions.
-    *
-    * In the scalar back-end, we use the sampler for dynamic uniform loads and
-    * pull an entire cache line at a time for constant offset loads both of
-    * which support almost any alignment.
-    *
-    * [1] glsl-1.40/uniform_buffer/vs-float-array-variable-index.shader_test
-    */
-   if (devinfo->ver >= 7)
-      ctx->Const.UseSTD430AsDefaultPacking = true;
-
-   if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT))
-      ctx->Const.AllowMappedBuffersDuringExecution = true;
-
-   /* GL_ARB_get_program_binary */
-   ctx->Const.NumProgramBinaryFormats = 1;
-}
-
-static void
-brw_initialize_cs_context_constants(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   /* Maximum number of scalar compute shader invocations that can be run in
-    * parallel in the same subslice assuming SIMD32 dispatch.
-    */
-   const unsigned max_threads = devinfo->max_cs_workgroup_threads;
-   const uint32_t max_invocations = 32 * max_threads;
-   ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
-   ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
-   ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
-   ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
-   ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
-
-   /* Constants used for ARB_compute_variable_group_size. */
-   if (devinfo->ver >= 7) {
-      assert(max_invocations >= 512);
-      ctx->Const.MaxComputeVariableGroupSize[0] = max_invocations;
-      ctx->Const.MaxComputeVariableGroupSize[1] = max_invocations;
-      ctx->Const.MaxComputeVariableGroupSize[2] = max_invocations;
-      ctx->Const.MaxComputeVariableGroupInvocations = max_invocations;
-   }
-}
-
-/**
- * Process driconf (drirc) options, setting appropriate context flags.
- *
- * brw_init_extensions still pokes at optionCache directly, in order to
- * avoid advertising various extensions.  No flags are set, so it makes
- * sense to continue doing that there.
- */
-static void
-brw_process_driconf_options(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-   const driOptionCache *const options = &brw->screen->optionCache;
-
-   if (INTEL_DEBUG(DEBUG_NO_HIZ)) {
-       brw->has_hiz = false;
-       /* On gfx6, you can only do separate stencil with HIZ. */
-       if (devinfo->ver == 6)
-          brw->has_separate_stencil = false;
-   }
-
-   if (driQueryOptionb(options, "mesa_no_error"))
-      ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR;
-
-   if (driQueryOptionb(options, "always_flush_batch")) {
-      fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
-      brw->always_flush_batch = true;
-   }
-
-   if (driQueryOptionb(options, "always_flush_cache")) {
-      fprintf(stderr, "flushing GPU caches before/after each draw call\n");
-      brw->always_flush_cache = true;
-   }
-
-   if (driQueryOptionb(options, "disable_throttling")) {
-      fprintf(stderr, "disabling flush throttling\n");
-      brw->disable_throttling = true;
-   }
-
-   brw->precompile = driQueryOptionb(&brw->screen->optionCache, "shader_precompile");
-
-   if (driQueryOptionb(&brw->screen->optionCache, "precise_trig"))
-      brw->screen->compiler->precise_trig = true;
-
-   ctx->Const.ForceGLSLExtensionsWarn =
-      driQueryOptionb(options, "force_glsl_extensions_warn");
-
-   ctx->Const.ForceGLSLVersion =
-      driQueryOptioni(options, "force_glsl_version");
-
-   ctx->Const.DisableGLSLLineContinuations =
-      driQueryOptionb(options, "disable_glsl_line_continuations");
-
-   ctx->Const.AllowGLSLExtensionDirectiveMidShader =
-      driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
-
-   ctx->Const.AllowGLSLBuiltinVariableRedeclaration =
-      driQueryOptionb(options, "allow_glsl_builtin_variable_redeclaration");
-
-   ctx->Const.AllowHigherCompatVersion =
-      driQueryOptionb(options, "allow_higher_compat_version");
-
-   ctx->Const.ForceGLSLAbsSqrt =
-      driQueryOptionb(options, "force_glsl_abs_sqrt");
-
-   ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init") ? 1 : 0;
-
-   brw->dual_color_blend_by_location =
-      driQueryOptionb(options, "dual_color_blend_by_location");
-
-   ctx->Const.AllowGLSLCrossStageInterpolationMismatch =
-      driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch");
-
-   char *vendor_str = driQueryOptionstr(options, "force_gl_vendor");
-   /* not an empty string */
-   if (*vendor_str)
-      ctx->Const.VendorOverride = vendor_str;
-
-   ctx->Const.dri_config_options_sha1 =
-      ralloc_array(brw->mem_ctx, unsigned char, 20);
-   driComputeOptionsSha1(&brw->screen->optionCache,
-                         ctx->Const.dri_config_options_sha1);
-}
-
-GLboolean
-brw_create_context(gl_api api,
-                   const struct gl_config *mesaVis,
-                   __DRIcontext *driContextPriv,
-                   const struct __DriverContextConfig *ctx_config,
-                   unsigned *dri_ctx_error,
-                   void *sharedContextPrivate)
-{
-   struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
-   struct brw_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
-   const struct intel_device_info *devinfo = &screen->devinfo;
-   struct dd_function_table functions;
-
-   /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
-    * provides us with context reset notifications.
-    */
-   uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG |
-                            __DRI_CTX_FLAG_FORWARD_COMPATIBLE |
-                            __DRI_CTX_FLAG_NO_ERROR;
-
-   if (screen->has_context_reset_notification)
-      allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
-
-   if (ctx_config->flags & ~allowed_flags) {
-      *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
-      return false;
-   }
-
-   if (ctx_config->attribute_mask &
-       ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY |
-         __DRIVER_CONTEXT_ATTRIB_PRIORITY)) {
-      *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE;
-      return false;
-   }
-
-   bool notify_reset =
-      ((ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) &&
-       ctx_config->reset_strategy != __DRI_CTX_RESET_NO_NOTIFICATION);
-
-   struct brw_context *brw = align_calloc(sizeof(struct brw_context), 16);
-   if (!brw) {
-      fprintf(stderr, "%s: failed to alloc context\n", __func__);
-      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
-      return false;
-   }
-   brw->mem_ctx = ralloc_context(NULL);
-   brw->perf_ctx = intel_perf_new_context(brw->mem_ctx);
-
-   driContextPriv->driverPrivate = brw;
-   brw->driContext = driContextPriv;
-   brw->screen = screen;
-   brw->bufmgr = screen->bufmgr;
-
-   brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
-   brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
-
-   /* We don't push UBOs on IVB and earlier because the restrictions on
-    * 3DSTATE_CONSTANT_* make it really annoying to use push constants
-    * without dynamic state base address.
-    */
-   brw->can_push_ubos = devinfo->verx10 >= 75;
-
-   brw->isl_dev = screen->isl_dev;
-
-   brw->vs.base.stage = MESA_SHADER_VERTEX;
-   brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
-   brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
-   brw->gs.base.stage = MESA_SHADER_GEOMETRY;
-   brw->wm.base.stage = MESA_SHADER_FRAGMENT;
-   brw->cs.base.stage = MESA_SHADER_COMPUTE;
-
-   brw_init_driver_functions(brw, &functions);
-
-   if (notify_reset)
-      functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
-
-   brw_process_driconf_options(brw);
-
-   if (api == API_OPENGL_CORE &&
-       driQueryOptionb(&screen->optionCache, "force_compat_profile")) {
-      api = API_OPENGL_COMPAT;
-   }
-
-   struct gl_context *ctx = &brw->ctx;
-
-   if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
-      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
-      fprintf(stderr, "%s: failed to init mesa context\n", __func__);
-      brw_destroy_context(driContextPriv);
-      return false;
-   }
-
-   driContextSetFlags(ctx, ctx_config->flags);
-
-   /* Initialize the software rasterizer and helper modules.
-    *
-    * As of GL 3.1 core, the gfx4+ driver doesn't need the swrast context for
-    * software fallbacks (which we have to support on legacy GL to do weird
-    * glDrawPixels(), glBitmap(), and other functions).
-    */
-   if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
-      _swrast_CreateContext(ctx);
-   }
-
-   _vbo_CreateContext(ctx, true);
-   if (ctx->swrast_context) {
-      _tnl_CreateContext(ctx);
-      TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
-      _swsetup_CreateContext(ctx);
-
-      /* Configure swrast to match hardware characteristics: */
-      _swrast_allow_pixel_fog(ctx, false);
-      _swrast_allow_vertex_fog(ctx, true);
-   }
-
-   _mesa_meta_init(ctx);
-
-   if (INTEL_DEBUG(DEBUG_PERF))
-      brw->perf_debug = true;
-
-   brw_initialize_cs_context_constants(brw);
-   brw_initialize_context_constants(brw);
-
-   ctx->Const.ResetStrategy = notify_reset
-      ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
-
-   /* Reinitialize the context point state.  It depends on ctx->Const values. */
-   _mesa_init_point(ctx);
-
-   brw_fbo_init(brw);
-
-   brw_batch_init(brw);
-
-   /* Create a new hardware context.  Using a hardware context means that
-    * our GPU state will be saved/restored on context switch, allowing us
-    * to assume that the GPU is in the same state we left it in.
-    *
-    * This is required for transform feedback buffer offsets, query objects,
-    * and also allows us to reduce how much state we have to emit.
-    */
-   brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
-   if (!brw->hw_ctx && devinfo->ver >= 6) {
-      fprintf(stderr, "Failed to create hardware context.\n");
-      brw_destroy_context(driContextPriv);
-      return false;
-   }
-
-   if (brw->hw_ctx) {
-      int hw_priority = INTEL_CONTEXT_MEDIUM_PRIORITY;
-      if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) {
-         switch (ctx_config->priority) {
-         case __DRI_CTX_PRIORITY_LOW:
-            hw_priority = INTEL_CONTEXT_LOW_PRIORITY;
-            break;
-         case __DRI_CTX_PRIORITY_HIGH:
-            hw_priority = INTEL_CONTEXT_HIGH_PRIORITY;
-            break;
-         }
-      }
-      if (hw_priority != I915_CONTEXT_DEFAULT_PRIORITY &&
-          brw_hw_context_set_priority(brw->bufmgr, brw->hw_ctx, hw_priority)) {
-         fprintf(stderr,
-                 "Failed to set priority [%d:%d] for hardware context.\n",
-                 ctx_config->priority, hw_priority);
-         brw_destroy_context(driContextPriv);
-         return false;
-      }
-   }
-
-   if (brw_init_pipe_control(brw, devinfo)) {
-      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
-      brw_destroy_context(driContextPriv);
-      return false;
-   }
-
-   brw_upload_init(&brw->upload, brw->bufmgr, 65536);
-
-   brw_init_state(brw);
-
-   brw_init_extensions(ctx);
-
-   brw_init_surface_formats(brw);
-
-   brw_blorp_init(brw);
-
-   brw->urb.size = devinfo->urb.size;
-
-   if (devinfo->ver == 6)
-      brw->urb.gs_present = false;
-
-   brw->prim_restart.in_progress = false;
-   brw->prim_restart.enable_cut_index = false;
-   brw->gs.enabled = false;
-   brw->clip.viewport_count = 1;
-
-   brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
-
-   brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
-
-   ctx->VertexProgram._MaintainTnlProgram = true;
-   ctx->FragmentProgram._MaintainTexEnvProgram = true;
-   _mesa_reset_vertex_processing_mode(ctx);
-
-   brw_draw_init( brw );
-
-   if ((ctx_config->flags & __DRI_CTX_FLAG_DEBUG) != 0) {
-      /* Turn on some extra GL_ARB_debug_output generation. */
-      brw->perf_debug = true;
-   }
-
-   if ((ctx_config->flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
-      ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
-      ctx->Const.RobustAccess = GL_TRUE;
-   }
-
-   if (INTEL_DEBUG(DEBUG_SHADER_TIME))
-      brw_init_shader_time(brw);
-
-   _mesa_override_extensions(ctx);
-   _mesa_compute_version(ctx);
-
-#ifndef NDEBUG
-   /* Enforce that the version of the context that was created is at least as
-    * high as the version that was advertised via GLX / EGL / whatever window
-    * system.
-    */
-   const __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
-
-   switch (api) {
-   case API_OPENGL_COMPAT:
-      assert(ctx->Version >= dri_screen->max_gl_compat_version);
-      break;
-   case API_OPENGLES:
-      assert(ctx->Version >= dri_screen->max_gl_es1_version);
-      break;
-   case API_OPENGLES2:
-      assert(ctx->Version >= dri_screen->max_gl_es2_version);
-      break;
-   case API_OPENGL_CORE:
-      assert(ctx->Version >= dri_screen->max_gl_core_version);
-      break;
-   }
-#endif
-
-   /* GL_ARB_gl_spirv */
-   if (ctx->Extensions.ARB_gl_spirv) {
-      brw_initialize_spirv_supported_capabilities(brw);
-
-      if (ctx->Extensions.ARB_spirv_extensions) {
-         /* GL_ARB_spirv_extensions */
-         ctx->Const.SpirVExtensions = MALLOC_STRUCT(spirv_supported_extensions);
-         _mesa_fill_supported_spirv_extensions(ctx->Const.SpirVExtensions,
-                                               &ctx->Const.SpirVCapabilities);
-      }
-   }
-
-   _mesa_initialize_dispatch_tables(ctx);
-   _mesa_initialize_vbo_vtxfmt(ctx);
-
-   if (ctx->Extensions.INTEL_performance_query)
-      brw_init_performance_queries(brw);
-
-   brw->ctx.Cache = brw->screen->disk_cache;
-
-   if (driContextPriv->driScreenPriv->dri2.backgroundCallable &&
-       driQueryOptionb(&screen->optionCache, "mesa_glthread")) {
-      /* Loader supports multithreading, and so do we. */
-      _mesa_glthread_init(ctx);
-   }
-
-   return true;
-}
-
-void
-brw_destroy_context(__DRIcontext *driContextPriv)
-{
-   struct brw_context *brw =
-      (struct brw_context *) driContextPriv->driverPrivate;
-   struct gl_context *ctx = &brw->ctx;
-
-   GET_CURRENT_CONTEXT(curctx);
-
-   if (curctx == NULL) {
-      /* No current context, but we need one to release
-       * renderbuffer surface when we release framebuffer.
-       * So temporarily bind the context.
-       */
-      _mesa_make_current(ctx, NULL, NULL);
-   }
-
-   _mesa_glthread_destroy(&brw->ctx);
-
-   _mesa_meta_free(&brw->ctx);
-
-   if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
-      /* Force a report. */
-      brw->shader_time.report_time = 0;
-
-      brw_collect_and_report_shader_time(brw);
-      brw_destroy_shader_time(brw);
-   }
-
-   blorp_finish(&brw->blorp);
-
-   brw_destroy_state(brw);
-   brw_draw_destroy(brw);
-
-   brw_bo_unreference(brw->curbe.curbe_bo);
-
-   brw_bo_unreference(brw->vs.base.scratch_bo);
-   brw_bo_unreference(brw->tcs.base.scratch_bo);
-   brw_bo_unreference(brw->tes.base.scratch_bo);
-   brw_bo_unreference(brw->gs.base.scratch_bo);
-   brw_bo_unreference(brw->wm.base.scratch_bo);
-
-   brw_bo_unreference(brw->vs.base.push_const_bo);
-   brw_bo_unreference(brw->tcs.base.push_const_bo);
-   brw_bo_unreference(brw->tes.base.push_const_bo);
-   brw_bo_unreference(brw->gs.base.push_const_bo);
-   brw_bo_unreference(brw->wm.base.push_const_bo);
-
-   brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
-
-   if (ctx->swrast_context) {
-      _swsetup_DestroyContext(&brw->ctx);
-      _tnl_DestroyContext(&brw->ctx);
-   }
-   _vbo_DestroyContext(&brw->ctx);
-
-   if (ctx->swrast_context)
-      _swrast_DestroyContext(&brw->ctx);
-
-   brw_fini_pipe_control(brw);
-   brw_batch_free(&brw->batch);
-
-   brw_bo_unreference(brw->throttle_batch[1]);
-   brw_bo_unreference(brw->throttle_batch[0]);
-   brw->throttle_batch[1] = NULL;
-   brw->throttle_batch[0] = NULL;
-
-   /* free the Mesa context */
-   _mesa_free_context_data(&brw->ctx, true);
-
-   ralloc_free(brw->mem_ctx);
-   align_free(brw);
-   driContextPriv->driverPrivate = NULL;
-}
-
-GLboolean
-brw_unbind_context(__DRIcontext *driContextPriv)
-{
-   struct gl_context *ctx = driContextPriv->driverPrivate;
-   _mesa_glthread_finish(ctx);
-
-   /* Unset current context and dispath table */
-   _mesa_make_current(NULL, NULL, NULL);
-
-   return true;
-}
-
-/**
- * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
- * on window system framebuffers.
- *
- * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
- * your renderbuffer can do sRGB encode, and you can flip a switch that does
- * sRGB encode if the renderbuffer can handle it.  You can ask specifically
- * for a visual where you're guaranteed to be capable, but it turns out that
- * everyone just makes all their ARGB8888 visuals capable and doesn't offer
- * incapable ones, because there's no difference between the two in resources
- * used.  Applications thus get built that accidentally rely on the default
- * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
- * great...
- *
- * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
- * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
- * So they removed the enable knob and made it "if the renderbuffer is sRGB
- * capable, do sRGB encode".  Then, for your window system renderbuffers, you
- * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
- * and get no sRGB encode (assuming that both kinds of visual are available).
- * Thus our choice to support sRGB by default on our visuals for desktop would
- * result in broken rendering of GLES apps that aren't expecting sRGB encode.
- *
- * Unfortunately, renderbuffer setup happens before a context is created.  So
- * in brw_screen.c we always set up sRGB, and here, if you're a GLES2/3
- * context (without an sRGB visual), we go turn that back off before anyone
- * finds out.
- */
-static void
-brw_gles3_srgb_workaround(struct brw_context *brw, struct gl_framebuffer *fb)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
-      return;
-
-   for (int i = 0; i < BUFFER_COUNT; i++) {
-      struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
-
-      /* Check if sRGB was specifically asked for. */
-      struct brw_renderbuffer *irb = brw_get_renderbuffer(fb, i);
-      if (irb && irb->need_srgb)
-         return;
-
-      if (rb)
-         rb->Format = _mesa_get_srgb_format_linear(rb->Format);
-   }
-   /* Disable sRGB from framebuffers that are not compatible. */
-   fb->Visual.sRGBCapable = false;
-}
-
-GLboolean
-brw_make_current(__DRIcontext *driContextPriv,
-                 __DRIdrawable *driDrawPriv,
-                 __DRIdrawable *driReadPriv)
-{
-   struct brw_context *brw;
-
-   if (driContextPriv)
-      brw = (struct brw_context *) driContextPriv->driverPrivate;
-   else
-      brw = NULL;
-
-   if (driContextPriv) {
-      struct gl_context *ctx = &brw->ctx;
-      struct gl_framebuffer *fb, *readFb;
-
-      if (driDrawPriv == NULL) {
-         fb = _mesa_get_incomplete_framebuffer();
-      } else {
-         fb = driDrawPriv->driverPrivate;
-         driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
-      }
-
-      if (driReadPriv == NULL) {
-         readFb = _mesa_get_incomplete_framebuffer();
-      } else {
-         readFb = driReadPriv->driverPrivate;
-         driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
-      }
-
-      /* The sRGB workaround changes the renderbuffer's format. We must change
-       * the format before the renderbuffer's miptree get's allocated, otherwise
-       * the formats of the renderbuffer and its miptree will differ.
-       */
-      brw_gles3_srgb_workaround(brw, fb);
-      brw_gles3_srgb_workaround(brw, readFb);
-
-      /* If the context viewport hasn't been initialized, force a call out to
-       * the loader to get buffers so we have a drawable size for the initial
-       * viewport. */
-      if (!brw->ctx.ViewportInitialized)
-         brw_prepare_render(brw);
-
-      _mesa_make_current(ctx, fb, readFb);
-   } else {
-      GET_CURRENT_CONTEXT(ctx);
-      _mesa_glthread_finish(ctx);
-      _mesa_make_current(NULL, NULL, NULL);
-   }
-
-   return true;
-}
-
-void
-brw_resolve_for_dri2_flush(struct brw_context *brw,
-                           __DRIdrawable *drawable)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (devinfo->ver < 6) {
-      /* MSAA and fast color clear are not supported, so don't waste time
-       * checking whether a resolve is needed.
-       */
-      return;
-   }
-
-   struct gl_framebuffer *fb = drawable->driverPrivate;
-   struct brw_renderbuffer *rb;
-
-   /* Usually, only the back buffer will need to be downsampled. However,
-    * the front buffer will also need it if the user has rendered into it.
-    */
-   static const gl_buffer_index buffers[2] = {
-         BUFFER_BACK_LEFT,
-         BUFFER_FRONT_LEFT,
-   };
-
-   for (int i = 0; i < 2; ++i) {
-      rb = brw_get_renderbuffer(fb, buffers[i]);
-      if (rb == NULL || rb->mt == NULL)
-         continue;
-      if (rb->mt->surf.samples == 1) {
-         assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
-                rb->layer_count == 1);
-         brw_miptree_prepare_external(brw, rb->mt);
-      } else {
-         brw_renderbuffer_downsample(brw, rb);
-
-         /* Call prepare_external on the single-sample miptree to do any
-          * needed resolves prior to handing it off to the window system.
-          * This is needed in the case that rb->singlesample_mt is Y-tiled
-          * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E.  In
-          * this case, the MSAA resolve above will write compressed data into
-          * rb->singlesample_mt.
-          *
-          * TODO: Some day, if we decide to care about the tiny performance
-          * hit we're taking by doing the MSAA resolve and then a CCS resolve,
-          * we could detect this case and just allocate the single-sampled
-          * miptree without aux.  However, that would be a lot of plumbing and
-          * this is a rather exotic case so it's not really worth it.
-          */
-         brw_miptree_prepare_external(brw, rb->singlesample_mt);
-      }
-   }
-}
-
-static unsigned
-brw_bits_per_pixel(const struct brw_renderbuffer *rb)
-{
-   return _mesa_get_format_bytes(brw_rb_format(rb)) * 8;
-}
-
-static void
-brw_query_dri2_buffers(struct brw_context *brw,
-                       __DRIdrawable *drawable,
-                       __DRIbuffer **buffers,
-                       int *count);
-
-static void
-brw_process_dri2_buffer(struct brw_context *brw,
-                        __DRIdrawable *drawable,
-                        __DRIbuffer *buffer,
-                        struct brw_renderbuffer *rb,
-                        const char *buffer_name);
-
-static void
-brw_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
-
-static void
-brw_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
-{
-   struct gl_framebuffer *fb = drawable->driverPrivate;
-   struct brw_renderbuffer *rb;
-   __DRIbuffer *buffers = NULL;
-   int count;
-   const char *region_name;
-
-   /* Set this up front, so that in case our buffers get invalidated
-    * while we're getting new buffers, we don't clobber the stamp and
-    * thus ignore the invalidate. */
-   drawable->lastStamp = drawable->dri2.stamp;
-
-   if (INTEL_DEBUG(DEBUG_DRI))
-      fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
-
-   brw_query_dri2_buffers(brw, drawable, &buffers, &count);
-
-   if (buffers == NULL)
-      return;
-
-   for (int i = 0; i < count; i++) {
-       switch (buffers[i].attachment) {
-       case __DRI_BUFFER_FRONT_LEFT:
-           rb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
-           region_name = "dri2 front buffer";
-           break;
-
-       case __DRI_BUFFER_FAKE_FRONT_LEFT:
-           rb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
-           region_name = "dri2 fake front buffer";
-           break;
-
-       case __DRI_BUFFER_BACK_LEFT:
-           rb = brw_get_renderbuffer(fb, BUFFER_BACK_LEFT);
-           region_name = "dri2 back buffer";
-           break;
-
-       case __DRI_BUFFER_DEPTH:
-       case __DRI_BUFFER_HIZ:
-       case __DRI_BUFFER_DEPTH_STENCIL:
-       case __DRI_BUFFER_STENCIL:
-       case __DRI_BUFFER_ACCUM:
-       default:
-           fprintf(stderr,
-                   "unhandled buffer attach event, attachment type %d\n",
-                   buffers[i].attachment);
-           return;
-       }
-
-       brw_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
-   }
-
-}
-
-void
-brw_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
-{
-   struct brw_context *brw = context->driverPrivate;
-   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
-
-   /* Set this up front, so that in case our buffers get invalidated
-    * while we're getting new buffers, we don't clobber the stamp and
-    * thus ignore the invalidate. */
-   drawable->lastStamp = drawable->dri2.stamp;
-
-   if (INTEL_DEBUG(DEBUG_DRI))
-      fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
-
-   if (dri_screen->image.loader)
-      brw_update_image_buffers(brw, drawable);
-   else
-      brw_update_dri2_buffers(brw, drawable);
-
-   driUpdateFramebufferSize(&brw->ctx, drawable);
-}
-
-/**
- * intel_prepare_render should be called anywhere that curent read/drawbuffer
- * state is required.
- */
-void
-brw_prepare_render(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   __DRIcontext *driContext = brw->driContext;
-   __DRIdrawable *drawable;
-
-   drawable = driContext->driDrawablePriv;
-   if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
-      if (drawable->lastStamp != drawable->dri2.stamp)
-         brw_update_renderbuffers(driContext, drawable);
-      driContext->dri2.draw_stamp = drawable->dri2.stamp;
-   }
-
-   drawable = driContext->driReadablePriv;
-   if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
-      if (drawable->lastStamp != drawable->dri2.stamp)
-         brw_update_renderbuffers(driContext, drawable);
-      driContext->dri2.read_stamp = drawable->dri2.stamp;
-   }
-
-   /* If we're currently rendering to the front buffer, the rendering
-    * that will happen next will probably dirty the front buffer.  So
-    * mark it as dirty here.
-    */
-   if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer) &&
-       ctx->DrawBuffer != _mesa_get_incomplete_framebuffer()) {
-      brw->front_buffer_dirty = true;
-   }
-
-   if (brw->is_shared_buffer_bound) {
-      /* Subsequent rendering will probably dirty the shared buffer. */
-      brw->is_shared_buffer_dirty = true;
-   }
-}
-
-/**
- * \brief Query DRI2 to obtain a DRIdrawable's buffers.
- *
- * To determine which DRI buffers to request, examine the renderbuffers
- * attached to the drawable's framebuffer. Then request the buffers with
- * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
- *
- * This is called from brw_update_renderbuffers().
- *
- * \param drawable      Drawable whose buffers are queried.
- * \param buffers       [out] List of buffers returned by DRI2 query.
- * \param buffer_count  [out] Number of buffers returned.
- *
- * \see brw_update_renderbuffers()
- * \see DRI2GetBuffers()
- * \see DRI2GetBuffersWithFormat()
- */
-static void
-brw_query_dri2_buffers(struct brw_context *brw,
-                       __DRIdrawable *drawable,
-                       __DRIbuffer **buffers,
-                       int *buffer_count)
-{
-   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
-   struct gl_framebuffer *fb = drawable->driverPrivate;
-   int i = 0;
-   unsigned attachments[__DRI_BUFFER_COUNT];
-
-   struct brw_renderbuffer *front_rb;
-   struct brw_renderbuffer *back_rb;
-
-   front_rb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
-   back_rb = brw_get_renderbuffer(fb, BUFFER_BACK_LEFT);
-
-   memset(attachments, 0, sizeof(attachments));
-   if ((_mesa_is_front_buffer_drawing(fb) ||
-        _mesa_is_front_buffer_reading(fb) ||
-        !back_rb) && front_rb) {
-      /* If a fake front buffer is in use, then querying for
-       * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
-       * the real front buffer to the fake front buffer.  So before doing the
-       * query, we need to make sure all the pending drawing has landed in the
-       * real front buffer.
-       */
-      brw_batch_flush(brw);
-      brw_flush_front(&brw->ctx);
-
-      attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
-      attachments[i++] = brw_bits_per_pixel(front_rb);
-   } else if (front_rb && brw->front_buffer_dirty) {
-      /* We have pending front buffer rendering, but we aren't querying for a
-       * front buffer.  If the front buffer we have is a fake front buffer,
-       * the X server is going to throw it away when it processes the query.
-       * So before doing the query, make sure all the pending drawing has
-       * landed in the real front buffer.
-       */
-      brw_batch_flush(brw);
-      brw_flush_front(&brw->ctx);
-   }
-
-   if (back_rb) {
-      attachments[i++] = __DRI_BUFFER_BACK_LEFT;
-      attachments[i++] = brw_bits_per_pixel(back_rb);
-   }
-
-   assert(i <= ARRAY_SIZE(attachments));
-
-   *buffers =
-      dri_screen->dri2.loader->getBuffersWithFormat(drawable,
-                                                    &drawable->w,
-                                                    &drawable->h,
-                                                    attachments, i / 2,
-                                                    buffer_count,
-                                                    drawable->loaderPrivate);
-}
-
-/**
- * \brief Assign a DRI buffer's DRM region to a renderbuffer.
- *
- * This is called from brw_update_renderbuffers().
- *
- * \par Note:
- *    DRI buffers whose attachment point is DRI2BufferStencil or
- *    DRI2BufferDepthStencil are handled as special cases.
- *
- * \param buffer_name is a human readable name, such as "dri2 front buffer",
- *        that is passed to brw_bo_gem_create_from_name().
- *
- * \see brw_update_renderbuffers()
- */
-static void
-brw_process_dri2_buffer(struct brw_context *brw,
-                        __DRIdrawable *drawable,
-                        __DRIbuffer *buffer,
-                        struct brw_renderbuffer *rb,
-                        const char *buffer_name)
-{
-   struct gl_framebuffer *fb = drawable->driverPrivate;
-   struct brw_bo *bo;
-
-   if (!rb)
-      return;
-
-   unsigned num_samples = rb->Base.Base.NumSamples;
-
-   /* We try to avoid closing and reopening the same BO name, because the first
-    * use of a mapping of the buffer involves a bunch of page faulting which is
-    * moderately expensive.
-    */
-   struct brw_mipmap_tree *last_mt;
-   if (num_samples == 0)
-      last_mt = rb->mt;
-   else
-      last_mt = rb->singlesample_mt;
-
-   uint32_t old_name = 0;
-   if (last_mt) {
-       /* The bo already has a name because the miptree was created by a
-        * previous call to brw_process_dri2_buffer(). If a bo already has a
-        * name, then brw_bo_flink() is a low-cost getter.  It does not
-        * create a new name.
-        */
-      brw_bo_flink(last_mt->bo, &old_name);
-   }
-
-   if (old_name == buffer->name)
-      return;
-
-   if (INTEL_DEBUG(DEBUG_DRI)) {
-      fprintf(stderr,
-              "attaching buffer %d, at %d, cpp %d, pitch %d\n",
-              buffer->name, buffer->attachment,
-              buffer->cpp, buffer->pitch);
-   }
-
-   bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name,
-                                          buffer->name);
-   if (!bo) {
-      fprintf(stderr,
-              "Failed to open BO for returned DRI2 buffer "
-              "(%dx%d, %s, named %d).\n"
-              "This is likely a bug in the X Server that will lead to a "
-              "crash soon.\n",
-              drawable->w, drawable->h, buffer_name, buffer->name);
-      return;
-   }
-
-   uint32_t tiling, swizzle;
-   brw_bo_get_tiling(bo, &tiling, &swizzle);
-
-   struct brw_mipmap_tree *mt =
-      brw_miptree_create_for_bo(brw,
-                                bo,
-                                brw_rb_format(rb),
-                                0,
-                                drawable->w,
-                                drawable->h,
-                                1,
-                                buffer->pitch,
-                                isl_tiling_from_i915_tiling(tiling),
-                                MIPTREE_CREATE_DEFAULT);
-   if (!mt) {
-      brw_bo_unreference(bo);
-      return;
-   }
-
-   /* We got this BO from X11.  We cana't assume that we have coherent texture
-    * access because X may suddenly decide to use it for scan-out which would
-    * destroy coherency.
-    */
-   bo->cache_coherent = false;
-
-   if (!brw_update_winsys_renderbuffer_miptree(brw, rb, mt,
-                                                 drawable->w, drawable->h,
-                                                 buffer->pitch)) {
-      brw_bo_unreference(bo);
-      brw_miptree_release(&mt);
-      return;
-   }
-
-   if (_mesa_is_front_buffer_drawing(fb) &&
-       (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
-        buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
-       rb->Base.Base.NumSamples > 1) {
-      brw_renderbuffer_upsample(brw, rb);
-   }
-
-   assert(rb->mt);
-
-   brw_bo_unreference(bo);
-}
-
-/**
- * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
- *
- * To determine which DRI buffers to request, examine the renderbuffers
- * attached to the drawable's framebuffer. Then request the buffers from
- * the image loader
- *
- * This is called from brw_update_renderbuffers().
- *
- * \param drawable      Drawable whose buffers are queried.
- * \param buffers       [out] List of buffers returned by DRI2 query.
- * \param buffer_count  [out] Number of buffers returned.
- *
- * \see brw_update_renderbuffers()
- */
-
-static void
-brw_update_image_buffer(struct brw_context *intel,
-                        __DRIdrawable *drawable,
-                        struct brw_renderbuffer *rb,
-                        __DRIimage *buffer,
-                        enum __DRIimageBufferMask buffer_type)
-{
-   struct gl_framebuffer *fb = drawable->driverPrivate;
-
-   if (!rb || !buffer->bo)
-      return;
-
-   unsigned num_samples = rb->Base.Base.NumSamples;
-
-   /* Check and see if we're already bound to the right
-    * buffer object
-    */
-   struct brw_mipmap_tree *last_mt;
-   if (num_samples == 0)
-      last_mt = rb->mt;
-   else
-      last_mt = rb->singlesample_mt;
-
-   if (last_mt && last_mt->bo == buffer->bo) {
-      if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
-         brw_miptree_make_shareable(intel, last_mt);
-      }
-      return;
-   }
-
-   /* Only allow internal compression if samples == 0.  For multisampled
-    * window system buffers, the only thing the single-sampled buffer is used
-    * for is as a resolve target.  If we do any compression beyond what is
-    * supported by the window system, we will just have to resolve so it's
-    * probably better to just not bother.
-    */
-   const bool allow_internal_aux = (num_samples == 0);
-
-   struct brw_mipmap_tree *mt =
-      brw_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D,
-                                       brw_rb_format(rb),
-                                       allow_internal_aux);
-   if (!mt)
-      return;
-
-   if (!brw_update_winsys_renderbuffer_miptree(intel, rb, mt,
-                                                 buffer->width, buffer->height,
-                                                 buffer->pitch)) {
-      brw_miptree_release(&mt);
-      return;
-   }
-
-   if (_mesa_is_front_buffer_drawing(fb) &&
-       buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
-       rb->Base.Base.NumSamples > 1) {
-      brw_renderbuffer_upsample(intel, rb);
-   }
-
-   if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
-      /* The compositor and the application may access this image
-       * concurrently. The display hardware may even scanout the image while
-       * the GPU is rendering to it.  Aux surfaces cause difficulty with
-       * concurrent access, so permanently disable aux for this miptree.
-       *
-       * Perhaps we could improve overall application performance by
-       * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to
-       * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER
-       * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this
-       * approach to be highly dependent on the application's GL usage.
-       *
-       * I [chadv] expect clever disabling/reenabling to be counterproductive
-       * in the use cases I care about: applications that render nearly
-       * realtime handwriting to the surface while possibly undergiong
-       * simultaneously scanout as a display plane. The app requires low
-       * render latency. Even though the app spends most of its time in
-       * shared-buffer mode, it also frequently transitions between
-       * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER)
-       * mode.  Visual sutter during the transitions should be avoided.
-       *
-       * In this case, I [chadv] believe reducing the GPU workload at
-       * shared-buffer/double-buffer transitions would offer a smoother app
-       * experience than any savings due to aux compression. But I've
-       * collected no data to prove my theory.
-       */
-      brw_miptree_make_shareable(intel, mt);
-   }
-}
-
-static void
-brw_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
-{
-   struct gl_framebuffer *fb = drawable->driverPrivate;
-   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
-   struct brw_renderbuffer *front_rb;
-   struct brw_renderbuffer *back_rb;
-   struct __DRIimageList images;
-   mesa_format format;
-   uint32_t buffer_mask = 0;
-   int ret;
-
-   front_rb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
-   back_rb = brw_get_renderbuffer(fb, BUFFER_BACK_LEFT);
-
-   if (back_rb)
-      format = brw_rb_format(back_rb);
-   else if (front_rb)
-      format = brw_rb_format(front_rb);
-   else
-      return;
-
-   if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
-                    _mesa_is_front_buffer_reading(fb) || !back_rb)) {
-      buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
-   }
-
-   if (back_rb)
-      buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
-
-   ret = dri_screen->image.loader->getBuffers(drawable,
-                                              driGLFormatToImageFormat(format),
-                                              &drawable->dri2.stamp,
-                                              drawable->loaderPrivate,
-                                              buffer_mask,
-                                              &images);
-   if (!ret)
-      return;
-
-   if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
-      drawable->w = images.front->width;
-      drawable->h = images.front->height;
-      brw_update_image_buffer(brw, drawable, front_rb, images.front,
-                              __DRI_IMAGE_BUFFER_FRONT);
-   }
-
-   if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
-      drawable->w = images.back->width;
-      drawable->h = images.back->height;
-      brw_update_image_buffer(brw, drawable, back_rb, images.back,
-                              __DRI_IMAGE_BUFFER_BACK);
-   }
-
-   if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) {
-      assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED);
-      drawable->w = images.back->width;
-      drawable->h = images.back->height;
-      brw_update_image_buffer(brw, drawable, back_rb, images.back,
-                              __DRI_IMAGE_BUFFER_SHARED);
-      brw->is_shared_buffer_bound = true;
-   } else {
-      brw->is_shared_buffer_bound = false;
-      brw->is_shared_buffer_dirty = false;
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
deleted file mode 100644
index 2061fb2..0000000
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ /dev/null
@@ -1,1637 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-
-#ifndef BRWCONTEXT_INC
-#define BRWCONTEXT_INC
-
-#include <stdbool.h>
-#include "main/macros.h"
-#include "main/mtypes.h"
-#include "main/errors.h"
-#include "brw_structs.h"
-#include "brw_pipe_control.h"
-#include "compiler/brw_compiler.h"
-
-#include "isl/isl.h"
-#include "blorp/blorp.h"
-
-#include <brw_bufmgr.h>
-
-#include "dev/intel_debug.h"
-#include "common/intel_decoder.h"
-#include "brw_screen.h"
-#include "brw_tex_obj.h"
-#include "perf/intel_perf.h"
-#include "perf/intel_perf_query.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-/* Glossary:
- *
- * URB - uniform resource buffer.  A mid-sized buffer which is
- * partitioned between the fixed function units and used for passing
- * values (vertices, primitives, constants) between them.
- *
- * CURBE - constant URB entry.  An urb region (entry) used to hold
- * constant values which the fixed function units can be instructed to
- * preload into the GRF when spawning a thread.
- *
- * VUE - vertex URB entry.  An urb entry holding a vertex and usually
- * a vertex header.  The header contains control information and
- * things like primitive type, Begin/end flags and clip codes.
- *
- * PUE - primitive URB entry.  An urb entry produced by the setup (SF)
- * unit holding rasterization and interpolation parameters.
- *
- * GRF - general register file.  One of several register files
- * addressable by programmed threads.  The inputs (r0, payload, curbe,
- * urb) of the thread are preloaded to this area before the thread is
- * spawned.  The registers are individually 8 dwords wide and suitable
- * for general usage.  Registers holding thread input values are not
- * special and may be overwritten.
- *
- * MRF - message register file.  Threads communicate (and terminate)
- * by sending messages.  Message parameters are placed in contiguous
- * MRF registers.  All program output is via these messages.  URB
- * entries are populated by sending a message to the shared URB
- * function containing the new data, together with a control word,
- * often an unmodified copy of R0.
- *
- * R0 - GRF register 0.  Typically holds control information used when
- * sending messages to other threads.
- *
- * EU or GFX4 EU: The name of the programmable subsystem of the
- * i965 hardware.  Threads are executed by the EU, the registers
- * described above are part of the EU architecture.
- *
- * Fixed function units:
- *
- * CS - Command streamer.  Notional first unit, little software
- * interaction.  Holds the URB entries used for constant data, ie the
- * CURBEs.
- *
- * VF/VS - Vertex Fetch / Vertex Shader.  The fixed function part of
- * this unit is responsible for pulling vertices out of vertex buffers
- * in vram and injecting them into the processing pipe as VUEs.  If
- * enabled, it first passes them to a VS thread which is a good place
- * for the driver to implement any active vertex shader.
- *
- * HS - Hull Shader (Tessellation Control Shader)
- *
- * TE - Tessellation Engine (Tessellation Primitive Generation)
- *
- * DS - Domain Shader (Tessellation Evaluation Shader)
- *
- * GS - Geometry Shader.  This corresponds to a new DX10 concept.  If
- * enabled, incoming strips etc are passed to GS threads in individual
- * line/triangle/point units.  The GS thread may perform arbitary
- * computation and emit whatever primtives with whatever vertices it
- * chooses.  This makes GS an excellent place to implement GL's
- * unfilled polygon modes, though of course it is capable of much
- * more.  Additionally, GS is used to translate away primitives not
- * handled by latter units, including Quads and Lineloops.
- *
- * CS - Clipper.  Mesa's clipping algorithms are imported to run on
- * this unit.  The fixed function part performs cliptesting against
- * the 6 fixed clipplanes and makes descisions on whether or not the
- * incoming primitive needs to be passed to a thread for clipping.
- * User clip planes are handled via cooperation with the VS thread.
- *
- * SF - Strips Fans or Setup: Triangles are prepared for
- * rasterization.  Interpolation coefficients are calculated.
- * Flatshading and two-side lighting usually performed here.
- *
- * WM - Windower.  Interpolation of vertex attributes performed here.
- * Fragment shader implemented here.  SIMD aspects of EU taken full
- * advantage of, as pixels are processed in blocks of 16.
- *
- * CC - Color Calculator.  No EU threads associated with this unit.
- * Handles blending and (presumably) depth and stencil testing.
- */
-
-struct brw_context;
-struct brw_inst;
-struct brw_vs_prog_key;
-struct brw_vue_prog_key;
-struct brw_wm_prog_key;
-struct brw_wm_prog_data;
-struct brw_cs_prog_key;
-struct brw_cs_prog_data;
-struct brw_label;
-
-enum brw_pipeline {
-   BRW_RENDER_PIPELINE,
-   BRW_COMPUTE_PIPELINE,
-
-   BRW_NUM_PIPELINES
-};
-
-enum brw_cache_id {
-   BRW_CACHE_FS_PROG,
-   BRW_CACHE_BLORP_PROG,
-   BRW_CACHE_SF_PROG,
-   BRW_CACHE_VS_PROG,
-   BRW_CACHE_FF_GS_PROG,
-   BRW_CACHE_GS_PROG,
-   BRW_CACHE_TCS_PROG,
-   BRW_CACHE_TES_PROG,
-   BRW_CACHE_CLIP_PROG,
-   BRW_CACHE_CS_PROG,
-
-   BRW_MAX_CACHE
-};
-
-enum gfx9_astc5x5_wa_tex_type {
-   GFX9_ASTC5X5_WA_TEX_TYPE_ASTC5x5 = 1 << 0,
-   GFX9_ASTC5X5_WA_TEX_TYPE_AUX     = 1 << 1,
-};
-
-enum brw_state_id {
-   /* brw_cache_ids must come first - see brw_program_cache.c */
-   BRW_STATE_URB_FENCE = BRW_MAX_CACHE,
-   BRW_STATE_FRAGMENT_PROGRAM,
-   BRW_STATE_GEOMETRY_PROGRAM,
-   BRW_STATE_TESS_PROGRAMS,
-   BRW_STATE_VERTEX_PROGRAM,
-   BRW_STATE_REDUCED_PRIMITIVE,
-   BRW_STATE_PATCH_PRIMITIVE,
-   BRW_STATE_PRIMITIVE,
-   BRW_STATE_CONTEXT,
-   BRW_STATE_PSP,
-   BRW_STATE_SURFACES,
-   BRW_STATE_BINDING_TABLE_POINTERS,
-   BRW_STATE_INDICES,
-   BRW_STATE_VERTICES,
-   BRW_STATE_DEFAULT_TESS_LEVELS,
-   BRW_STATE_BATCH,
-   BRW_STATE_INDEX_BUFFER,
-   BRW_STATE_VS_CONSTBUF,
-   BRW_STATE_TCS_CONSTBUF,
-   BRW_STATE_TES_CONSTBUF,
-   BRW_STATE_GS_CONSTBUF,
-   BRW_STATE_PROGRAM_CACHE,
-   BRW_STATE_STATE_BASE_ADDRESS,
-   BRW_STATE_VUE_MAP_GEOM_OUT,
-   BRW_STATE_TRANSFORM_FEEDBACK,
-   BRW_STATE_RASTERIZER_DISCARD,
-   BRW_STATE_STATS_WM,
-   BRW_STATE_UNIFORM_BUFFER,
-   BRW_STATE_IMAGE_UNITS,
-   BRW_STATE_META_IN_PROGRESS,
-   BRW_STATE_PUSH_CONSTANT_ALLOCATION,
-   BRW_STATE_NUM_SAMPLES,
-   BRW_STATE_TEXTURE_BUFFER,
-   BRW_STATE_GFX4_UNIT_STATE,
-   BRW_STATE_CC_VP,
-   BRW_STATE_SF_VP,
-   BRW_STATE_CLIP_VP,
-   BRW_STATE_SAMPLER_STATE_TABLE,
-   BRW_STATE_VS_ATTRIB_WORKAROUNDS,
-   BRW_STATE_COMPUTE_PROGRAM,
-   BRW_STATE_CS_WORK_GROUPS,
-   BRW_STATE_URB_SIZE,
-   BRW_STATE_CC_STATE,
-   BRW_STATE_BLORP,
-   BRW_STATE_VIEWPORT_COUNT,
-   BRW_STATE_CONSERVATIVE_RASTERIZATION,
-   BRW_STATE_DRAW_CALL,
-   BRW_STATE_AUX,
-   BRW_NUM_STATE_BITS
-};
-
-/**
- * BRW_NEW_*_PROG_DATA and BRW_NEW_*_PROGRAM are similar, but distinct.
- *
- * BRW_NEW_*_PROGRAM relates to the gl_shader_program/gl_program structures.
- * When the currently bound shader program differs from the previous draw
- * call, these will be flagged.  They cover brw->{stage}_program and
- * ctx->{Stage}Program->_Current.
- *
- * BRW_NEW_*_PROG_DATA is flagged when the effective shaders change, from a
- * driver perspective.  Even if the same shader is bound at the API level,
- * we may need to switch between multiple versions of that shader to handle
- * changes in non-orthagonal state.
- *
- * Additionally, multiple shader programs may have identical vertex shaders
- * (for example), or compile down to the same code in the backend.  We combine
- * those into a single program cache entry.
- *
- * BRW_NEW_*_PROG_DATA occurs when switching program cache entries, which
- * covers the brw_*_prog_data structures, and brw->*.prog_offset.
- */
-#define BRW_NEW_FS_PROG_DATA            (1ull << BRW_CACHE_FS_PROG)
-/* XXX: The BRW_NEW_BLORP_BLIT_PROG_DATA dirty bit is unused (as BLORP doesn't
- * use the normal state upload paths), but the cache is still used.  To avoid
- * polluting the brw_program_cache code with special cases, we retain the
- * dirty bit for now.  It should eventually be removed.
- */
-#define BRW_NEW_BLORP_BLIT_PROG_DATA    (1ull << BRW_CACHE_BLORP_PROG)
-#define BRW_NEW_SF_PROG_DATA            (1ull << BRW_CACHE_SF_PROG)
-#define BRW_NEW_VS_PROG_DATA            (1ull << BRW_CACHE_VS_PROG)
-#define BRW_NEW_FF_GS_PROG_DATA         (1ull << BRW_CACHE_FF_GS_PROG)
-#define BRW_NEW_GS_PROG_DATA            (1ull << BRW_CACHE_GS_PROG)
-#define BRW_NEW_TCS_PROG_DATA           (1ull << BRW_CACHE_TCS_PROG)
-#define BRW_NEW_TES_PROG_DATA           (1ull << BRW_CACHE_TES_PROG)
-#define BRW_NEW_CLIP_PROG_DATA          (1ull << BRW_CACHE_CLIP_PROG)
-#define BRW_NEW_CS_PROG_DATA            (1ull << BRW_CACHE_CS_PROG)
-#define BRW_NEW_URB_FENCE               (1ull << BRW_STATE_URB_FENCE)
-#define BRW_NEW_FRAGMENT_PROGRAM        (1ull << BRW_STATE_FRAGMENT_PROGRAM)
-#define BRW_NEW_GEOMETRY_PROGRAM        (1ull << BRW_STATE_GEOMETRY_PROGRAM)
-#define BRW_NEW_TESS_PROGRAMS           (1ull << BRW_STATE_TESS_PROGRAMS)
-#define BRW_NEW_VERTEX_PROGRAM          (1ull << BRW_STATE_VERTEX_PROGRAM)
-#define BRW_NEW_REDUCED_PRIMITIVE       (1ull << BRW_STATE_REDUCED_PRIMITIVE)
-#define BRW_NEW_PATCH_PRIMITIVE         (1ull << BRW_STATE_PATCH_PRIMITIVE)
-#define BRW_NEW_PRIMITIVE               (1ull << BRW_STATE_PRIMITIVE)
-#define BRW_NEW_CONTEXT                 (1ull << BRW_STATE_CONTEXT)
-#define BRW_NEW_PSP                     (1ull << BRW_STATE_PSP)
-#define BRW_NEW_SURFACES                (1ull << BRW_STATE_SURFACES)
-#define BRW_NEW_BINDING_TABLE_POINTERS  (1ull << BRW_STATE_BINDING_TABLE_POINTERS)
-#define BRW_NEW_INDICES                 (1ull << BRW_STATE_INDICES)
-#define BRW_NEW_VERTICES                (1ull << BRW_STATE_VERTICES)
-#define BRW_NEW_DEFAULT_TESS_LEVELS     (1ull << BRW_STATE_DEFAULT_TESS_LEVELS)
-/**
- * Used for any batch entry with a relocated pointer that will be used
- * by any 3D rendering.
- */
-#define BRW_NEW_BATCH                   (1ull << BRW_STATE_BATCH)
-/** \see brw.state.depth_region */
-#define BRW_NEW_INDEX_BUFFER            (1ull << BRW_STATE_INDEX_BUFFER)
-#define BRW_NEW_VS_CONSTBUF             (1ull << BRW_STATE_VS_CONSTBUF)
-#define BRW_NEW_TCS_CONSTBUF            (1ull << BRW_STATE_TCS_CONSTBUF)
-#define BRW_NEW_TES_CONSTBUF            (1ull << BRW_STATE_TES_CONSTBUF)
-#define BRW_NEW_GS_CONSTBUF             (1ull << BRW_STATE_GS_CONSTBUF)
-#define BRW_NEW_PROGRAM_CACHE           (1ull << BRW_STATE_PROGRAM_CACHE)
-#define BRW_NEW_STATE_BASE_ADDRESS      (1ull << BRW_STATE_STATE_BASE_ADDRESS)
-#define BRW_NEW_VUE_MAP_GEOM_OUT        (1ull << BRW_STATE_VUE_MAP_GEOM_OUT)
-#define BRW_NEW_VIEWPORT_COUNT          (1ull << BRW_STATE_VIEWPORT_COUNT)
-#define BRW_NEW_TRANSFORM_FEEDBACK      (1ull << BRW_STATE_TRANSFORM_FEEDBACK)
-#define BRW_NEW_RASTERIZER_DISCARD      (1ull << BRW_STATE_RASTERIZER_DISCARD)
-#define BRW_NEW_STATS_WM                (1ull << BRW_STATE_STATS_WM)
-#define BRW_NEW_UNIFORM_BUFFER          (1ull << BRW_STATE_UNIFORM_BUFFER)
-#define BRW_NEW_IMAGE_UNITS             (1ull << BRW_STATE_IMAGE_UNITS)
-#define BRW_NEW_META_IN_PROGRESS        (1ull << BRW_STATE_META_IN_PROGRESS)
-#define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1ull << BRW_STATE_PUSH_CONSTANT_ALLOCATION)
-#define BRW_NEW_NUM_SAMPLES             (1ull << BRW_STATE_NUM_SAMPLES)
-#define BRW_NEW_TEXTURE_BUFFER          (1ull << BRW_STATE_TEXTURE_BUFFER)
-#define BRW_NEW_GFX4_UNIT_STATE         (1ull << BRW_STATE_GFX4_UNIT_STATE)
-#define BRW_NEW_CC_VP                   (1ull << BRW_STATE_CC_VP)
-#define BRW_NEW_SF_VP                   (1ull << BRW_STATE_SF_VP)
-#define BRW_NEW_CLIP_VP                 (1ull << BRW_STATE_CLIP_VP)
-#define BRW_NEW_SAMPLER_STATE_TABLE     (1ull << BRW_STATE_SAMPLER_STATE_TABLE)
-#define BRW_NEW_VS_ATTRIB_WORKAROUNDS   (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS)
-#define BRW_NEW_COMPUTE_PROGRAM         (1ull << BRW_STATE_COMPUTE_PROGRAM)
-#define BRW_NEW_CS_WORK_GROUPS          (1ull << BRW_STATE_CS_WORK_GROUPS)
-#define BRW_NEW_URB_SIZE                (1ull << BRW_STATE_URB_SIZE)
-#define BRW_NEW_CC_STATE                (1ull << BRW_STATE_CC_STATE)
-#define BRW_NEW_BLORP                   (1ull << BRW_STATE_BLORP)
-#define BRW_NEW_CONSERVATIVE_RASTERIZATION (1ull << BRW_STATE_CONSERVATIVE_RASTERIZATION)
-#define BRW_NEW_DRAW_CALL               (1ull << BRW_STATE_DRAW_CALL)
-#define BRW_NEW_AUX_STATE               (1ull << BRW_STATE_AUX)
-
-struct brw_state_flags {
-   /** State update flags signalled by mesa internals */
-   GLuint mesa;
-   /**
-    * State update flags signalled as the result of brw_tracked_state updates
-    */
-   uint64_t brw;
-};
-
-
-/** Subclass of Mesa program */
-struct brw_program {
-   struct gl_program program;
-   GLuint id;
-
-   bool compiled_once;
-};
-
-/** Number of texture sampler units */
-#define BRW_MAX_TEX_UNIT 32
-
-/** Max number of UBOs in a shader */
-#define BRW_MAX_UBO 14
-
-/** Max number of SSBOs in a shader */
-#define BRW_MAX_SSBO 12
-
-/** Max number of atomic counter buffer objects in a shader */
-#define BRW_MAX_ABO 16
-
-/** Max number of image uniforms in a shader */
-#define BRW_MAX_IMAGES 32
-
-/** Maximum number of actual buffers used for stream output */
-#define BRW_MAX_SOL_BUFFERS 4
-
-#define BRW_MAX_SURFACES   (BRW_MAX_DRAW_BUFFERS +                      \
-                            BRW_MAX_TEX_UNIT * 2 + /* normal, gather */ \
-                            BRW_MAX_UBO +                               \
-                            BRW_MAX_SSBO +                              \
-                            BRW_MAX_ABO +                               \
-                            BRW_MAX_IMAGES +                            \
-                            2 + /* shader time, pull constants */       \
-                            1 /* cs num work groups */)
-
-struct brw_cache {
-   struct brw_context *brw;
-
-   struct brw_cache_item **items;
-   struct brw_bo *bo;
-   void *map;
-   GLuint size, n_items;
-
-   uint32_t next_offset;
-};
-
-#define perf_debug(...) do {                                    \
-   static GLuint msg_id = 0;                                    \
-   if (INTEL_DEBUG(DEBUG_PERF))                                 \
-      dbg_printf(__VA_ARGS__);                                  \
-   if (brw->perf_debug)                                         \
-      _mesa_gl_debugf(&brw->ctx, &msg_id,                       \
-                      MESA_DEBUG_SOURCE_API,                    \
-                      MESA_DEBUG_TYPE_PERFORMANCE,              \
-                      MESA_DEBUG_SEVERITY_MEDIUM,               \
-                      __VA_ARGS__);                             \
-} while(0)
-
-#define WARN_ONCE(cond, fmt...) do {                            \
-   if (unlikely(cond)) {                                        \
-      static bool _warned = false;                              \
-      static GLuint msg_id = 0;                                 \
-      if (!_warned) {                                           \
-         fprintf(stderr, "WARNING: ");                          \
-         fprintf(stderr, fmt);                                  \
-         _warned = true;                                        \
-                                                                \
-         _mesa_gl_debugf(ctx, &msg_id,                          \
-                         MESA_DEBUG_SOURCE_API,                 \
-                         MESA_DEBUG_TYPE_OTHER,                 \
-                         MESA_DEBUG_SEVERITY_HIGH, fmt);        \
-      }                                                         \
-   }                                                            \
-} while (0)
-
-/* Considered adding a member to this struct to document which flags
- * an update might raise so that ordering of the state atoms can be
- * checked or derived at runtime.  Dropped the idea in favor of having
- * a debug mode where the state is monitored for flags which are
- * raised that have already been tested against.
- */
-struct brw_tracked_state {
-   struct brw_state_flags dirty;
-   void (*emit)( struct brw_context *brw );
-};
-
-enum shader_time_shader_type {
-   ST_NONE,
-   ST_VS,
-   ST_TCS,
-   ST_TES,
-   ST_GS,
-   ST_FS8,
-   ST_FS16,
-   ST_FS32,
-   ST_CS,
-};
-
-struct brw_vertex_buffer {
-   /** Buffer object containing the uploaded vertex data */
-   struct brw_bo *bo;
-   uint32_t offset;
-   uint32_t size;
-   /** Byte stride between elements in the uploaded array */
-   GLuint stride;
-   GLuint step_rate;
-};
-struct brw_vertex_element {
-   const struct gl_vertex_format *glformat;
-
-   int buffer;
-   bool is_dual_slot;
-   /** Offset of the first element within the buffer object */
-   unsigned int offset;
-};
-
-struct brw_query_object {
-   struct gl_query_object Base;
-
-   /** Last query BO associated with this query. */
-   struct brw_bo *bo;
-
-   /** Last index in bo with query data for this object. */
-   int last_index;
-
-   /** True if we know the batch has been flushed since we ended the query. */
-   bool flushed;
-};
-
-struct brw_reloc_list {
-   struct drm_i915_gem_relocation_entry *relocs;
-   int reloc_count;
-   int reloc_array_size;
-};
-
-struct brw_growing_bo {
-   struct brw_bo *bo;
-   uint32_t *map;
-   struct brw_bo *partial_bo;
-   uint32_t *partial_bo_map;
-   unsigned partial_bytes;
-   enum brw_memory_zone memzone;
-};
-
-struct brw_batch {
-   /** Current batchbuffer being queued up. */
-   struct brw_growing_bo batch;
-   /** Current statebuffer being queued up. */
-   struct brw_growing_bo state;
-
-   /** Last batchbuffer submitted to the hardware.  Used for glFinish(). */
-   struct brw_bo *last_bo;
-
-#ifdef DEBUG
-   uint16_t emit, total;
-#endif
-   uint32_t *map_next;
-   uint32_t state_used;
-
-   bool use_shadow_copy;
-   bool use_batch_first;
-   bool needs_sol_reset;
-   bool state_base_address_emitted;
-   bool no_wrap;
-   bool contains_fence_signal;
-
-   struct brw_reloc_list batch_relocs;
-   struct brw_reloc_list state_relocs;
-   unsigned int valid_reloc_flags;
-
-   /** The validation list */
-   struct drm_i915_gem_exec_object2 *validation_list;
-   struct brw_bo **exec_bos;
-   int exec_count;
-   int exec_array_size;
-
-   /** The amount of aperture space (in bytes) used by all exec_bos */
-   uint64_t aperture_space;
-
-   struct {
-      uint32_t *map_next;
-      int batch_reloc_count;
-      int state_reloc_count;
-      int exec_count;
-   } saved;
-
-   /** Map from batch offset to brw_state_batch data (with DEBUG_BATCH) */
-   struct hash_table_u64 *state_batch_sizes;
-
-   struct intel_batch_decode_ctx decoder;
-
-   /** A list of drm_i915_exec_fences to have execbuf signal or wait on */
-   struct util_dynarray exec_fences;
-};
-
-#define BRW_MAX_XFB_STREAMS 4
-
-struct brw_transform_feedback_counter {
-   /**
-    * Index of the first entry of this counter within the primitive count BO.
-    * An entry is considered to be an N-tuple of 64bit values, where N is the
-    * number of vertex streams supported by the platform.
-    */
-   unsigned bo_start;
-
-   /**
-    * Index one past the last entry of this counter within the primitive
-    * count BO.
-    */
-   unsigned bo_end;
-
-   /**
-    * Primitive count values accumulated while this counter was active,
-    * excluding any entries buffered between \c bo_start and \c bo_end, which
-    * haven't been accounted for yet.
-    */
-   uint64_t accum[BRW_MAX_XFB_STREAMS];
-};
-
-static inline void
-brw_reset_transform_feedback_counter(
-   struct brw_transform_feedback_counter *counter)
-{
-   counter->bo_start = counter->bo_end;
-   memset(&counter->accum, 0, sizeof(counter->accum));
-}
-
-struct brw_transform_feedback_object {
-   struct gl_transform_feedback_object base;
-
-   /** A buffer to hold SO_WRITE_OFFSET(n) values while paused. */
-   struct brw_bo *offset_bo;
-
-   /** If true, SO_WRITE_OFFSET(n) should be reset to zero at next use. */
-   bool zero_offsets;
-
-   /** The most recent primitive mode (GL_TRIANGLES/GL_POINTS/GL_LINES). */
-   GLenum primitive_mode;
-
-   /**
-    * The maximum number of vertices that we can write without overflowing
-    * any of the buffers currently being used for transform feedback.
-    */
-   unsigned max_index;
-
-   struct brw_bo *prim_count_bo;
-
-   /**
-    * Count of primitives generated during this transform feedback operation.
-    */
-   struct brw_transform_feedback_counter counter;
-
-   /**
-    * Count of primitives generated during the previous transform feedback
-    * operation.  Used to implement DrawTransformFeedback().
-    */
-   struct brw_transform_feedback_counter previous_counter;
-
-   /**
-    * Number of vertices written between last Begin/EndTransformFeedback().
-    *
-    * Used to implement DrawTransformFeedback().
-    */
-   uint64_t vertices_written[BRW_MAX_XFB_STREAMS];
-   bool vertices_written_valid;
-};
-
-/**
- * Data shared between each programmable stage in the pipeline (vs, gs, and
- * wm).
- */
-struct brw_stage_state
-{
-   gl_shader_stage stage;
-   struct brw_stage_prog_data *prog_data;
-
-   /**
-    * Optional scratch buffer used to store spilled register values and
-    * variably-indexed GRF arrays.
-    *
-    * The contents of this buffer are short-lived so the same memory can be
-    * re-used at will for multiple shader programs (executed by the same fixed
-    * function).  However reusing a scratch BO for which shader invocations
-    * are still in flight with a per-thread scratch slot size other than the
-    * original can cause threads with different scratch slot size and FFTID
-    * (which may be executed in parallel depending on the shader stage and
-    * hardware generation) to map to an overlapping region of the scratch
-    * space, which can potentially lead to mutual scratch space corruption.
-    * For that reason if you borrow this scratch buffer you should only be
-    * using the slot size given by the \c per_thread_scratch member below,
-    * unless you're taking additional measures to synchronize thread execution
-    * across slot size changes.
-    */
-   struct brw_bo *scratch_bo;
-
-   /**
-    * Scratch slot size allocated for each thread in the buffer object given
-    * by \c scratch_bo.
-    */
-   uint32_t per_thread_scratch;
-
-   /** Offset in the program cache to the program */
-   uint32_t prog_offset;
-
-   /** Offset in the batchbuffer to Gfx4-5 pipelined state (VS/WM/GS_STATE). */
-   uint32_t state_offset;
-
-   struct brw_bo *push_const_bo; /* NULL if using the batchbuffer */
-   uint32_t push_const_offset; /* Offset in the push constant BO or batch */
-   int push_const_size; /* in 256-bit register increments */
-
-   /* Binding table: pointers to SURFACE_STATE entries. */
-   uint32_t bind_bo_offset;
-   uint32_t surf_offset[BRW_MAX_SURFACES];
-
-   /** SAMPLER_STATE count and table offset */
-   uint32_t sampler_count;
-   uint32_t sampler_offset;
-
-   struct brw_image_param image_param[BRW_MAX_IMAGES];
-
-   /** Need to re-emit 3DSTATE_CONSTANT_XS? */
-   bool push_constants_dirty;
-};
-
-enum brw_predicate_state {
-   /* The first two states are used if we can determine whether to draw
-    * without having to look at the values in the query object buffer. This
-    * will happen if there is no conditional render in progress, if the query
-    * object is already completed or if something else has already added
-    * samples to the preliminary result such as via a BLT command.
-    */
-   BRW_PREDICATE_STATE_RENDER,
-   BRW_PREDICATE_STATE_DONT_RENDER,
-   /* In this case whether to draw or not depends on the result of an
-    * MI_PREDICATE command so the predicate enable bit needs to be checked.
-    */
-   BRW_PREDICATE_STATE_USE_BIT,
-   /* In this case, either MI_PREDICATE doesn't exist or we lack the
-    * necessary kernel features to use it.  Stall for the query result.
-    */
-   BRW_PREDICATE_STATE_STALL_FOR_QUERY,
-};
-
-struct shader_times;
-
-struct intel_l3_config;
-struct intel_perf;
-
-struct brw_uploader {
-   struct brw_bufmgr *bufmgr;
-   struct brw_bo *bo;
-   void *map;
-   uint32_t next_offset;
-   unsigned default_size;
-};
-
-/**
- * brw_context is derived from gl_context.
- */
-struct brw_context
-{
-   struct gl_context ctx; /**< base class, must be first field */
-
-   struct
-   {
-      /**
-       * Emit an MI_REPORT_PERF_COUNT command packet.
-       *
-       * This asks the GPU to write a report of the current OA counter values
-       * into @bo at the given offset and containing the given @report_id
-       * which we can cross-reference when parsing the report (gfx7+ only).
-       */
-      void (*emit_mi_report_perf_count)(struct brw_context *brw,
-                                        struct brw_bo *bo,
-                                        uint32_t offset_in_bytes,
-                                        uint32_t report_id);
-
-      void (*emit_compute_walker)(struct brw_context *brw);
-      void (*emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
-                                    struct brw_bo *bo, uint32_t offset,
-                                    uint64_t imm);
-      void (*emit_state_base_address)(struct brw_context *brw);
-   } vtbl;
-
-   struct brw_bufmgr *bufmgr;
-
-   uint32_t hw_ctx;
-
-   /**
-    * BO for post-sync nonzero writes for gfx6 workaround.
-    *
-    * This buffer also contains a marker + description of the driver. This
-    * buffer is added to all execbufs syscalls so that we can identify the
-    * driver that generated a hang by looking at the content of the buffer in
-    * the error state.
-    *
-    * Read/write should go at workaround_bo_offset in that buffer to avoid
-    * overriding the debug data.
-    */
-   struct brw_bo *workaround_bo;
-   uint32_t workaround_bo_offset;
-   uint8_t pipe_controls_since_last_cs_stall;
-
-   /**
-    * Set of struct brw_bo * that have been rendered to within this batchbuffer
-    * and would need flushing before being used from another cache domain that
-    * isn't coherent with it (i.e. the sampler).
-    */
-   struct hash_table *render_cache;
-
-   /**
-    * Set of struct brw_bo * that have been used as a depth buffer within this
-    * batchbuffer and would need flushing before being used from another cache
-    * domain that isn't coherent with it (i.e. the sampler).
-    */
-   struct set *depth_cache;
-
-   /**
-    * Number of resets observed in the system at context creation.
-    *
-    * This is tracked in the context so that we can determine that another
-    * reset has occurred.
-    */
-   uint32_t reset_count;
-
-   struct brw_batch batch;
-
-   struct brw_uploader upload;
-
-   /**
-    * Set if rendering has occurred to the drawable's front buffer.
-    *
-    * This is used in the DRI2 case to detect that glFlush should also copy
-    * the contents of the fake front buffer to the real front buffer.
-    */
-   bool front_buffer_dirty;
-
-   /**
-    * True if the __DRIdrawable's current __DRIimageBufferMask is
-    * __DRI_IMAGE_BUFFER_SHARED.
-    */
-   bool is_shared_buffer_bound;
-
-   /**
-    * True if a shared buffer is bound and it has received any rendering since
-    * the previous __DRImutableRenderBufferLoaderExtension::displaySharedBuffer().
-    */
-   bool is_shared_buffer_dirty;
-
-   /** Framerate throttling: @{ */
-   struct brw_bo *throttle_batch[2];
-
-   /* Limit the number of outstanding SwapBuffers by waiting for an earlier
-    * frame of rendering to complete. This gives a very precise cap to the
-    * latency between input and output such that rendering never gets more
-    * than a frame behind the user. (With the caveat that we technically are
-    * not using the SwapBuffers itself as a barrier but the first batch
-    * submitted afterwards, which may be immediately prior to the next
-    * SwapBuffers.)
-    */
-   bool need_swap_throttle;
-
-   /** General throttling, not caught by throttling between SwapBuffers */
-   bool need_flush_throttle;
-   /** @} */
-
-   GLuint stats_wm;
-
-   /**
-    * drirc options:
-    * @{
-    */
-   bool always_flush_batch;
-   bool always_flush_cache;
-   bool disable_throttling;
-   bool precompile;
-   bool dual_color_blend_by_location;
-   /** @} */
-
-   GLuint primitive; /**< Hardware primitive, such as _3DPRIM_TRILIST. */
-
-   bool object_preemption; /**< Object level preemption enabled. */
-
-   GLenum reduced_primitive;
-
-   /**
-    * Set if we're either a debug context or the INTEL_DEBUG=perf environment
-    * variable is set, this is the flag indicating to do expensive work that
-    * might lead to a perf_debug() call.
-    */
-   bool perf_debug;
-
-   uint64_t max_gtt_map_object_size;
-
-   bool has_hiz;
-   bool has_separate_stencil;
-
-   bool can_push_ubos;
-
-   /** Derived stencil states. */
-   bool stencil_enabled;
-   bool stencil_two_sided;
-   bool stencil_write_enabled;
-   /** Derived polygon state. */
-   bool polygon_front_bit; /**< 0=GL_CCW, 1=GL_CW */
-
-   struct isl_device isl_dev;
-
-   struct blorp_context blorp;
-
-   GLuint NewGLState;
-   struct {
-      struct brw_state_flags pipelines[BRW_NUM_PIPELINES];
-   } state;
-
-   enum brw_pipeline last_pipeline;
-
-   struct brw_cache cache;
-
-   /* Whether a meta-operation is in progress. */
-   bool meta_in_progress;
-
-   /* Whether the last depth/stencil packets were both NULL. */
-   bool no_depth_or_stencil;
-
-   /* The last PMA stall bits programmed. */
-   uint32_t pma_stall_bits;
-
-   /* Whether INTEL_black_render is active. */
-   bool frontend_noop;
-
-   struct {
-      struct {
-         /**
-          * Either the value of gl_BaseVertex for indexed draw calls or the
-          * value of the argument <first> for non-indexed draw calls for the
-          * current _mesa_prim.
-          */
-         int firstvertex;
-
-         /** The value of gl_BaseInstance for the current _mesa_prim. */
-         int gl_baseinstance;
-      } params;
-
-      /**
-       * Buffer and offset used for GL_ARB_shader_draw_parameters which will
-       * point to the indirect buffer for indirect draw calls.
-       */
-      struct brw_bo *draw_params_bo;
-      uint32_t draw_params_offset;
-
-      struct {
-         /**
-          * The value of gl_DrawID for the current _mesa_prim. This always comes
-          * in from it's own vertex buffer since it's not part of the indirect
-          * draw parameters.
-          */
-         int gl_drawid;
-
-         /**
-          * Stores if the current _mesa_prim is an indexed or non-indexed draw
-          * (~0/0). Useful to calculate gl_BaseVertex as an AND of firstvertex
-          * and is_indexed_draw.
-          */
-         int is_indexed_draw;
-      } derived_params;
-
-      /**
-       * Buffer and offset used for GL_ARB_shader_draw_parameters which contains
-       * parameters that are not present in the indirect buffer. They will go in
-       * their own vertex element.
-       */
-      struct brw_bo *derived_draw_params_bo;
-      uint32_t derived_draw_params_offset;
-
-      /**
-       * Pointer to the the buffer storing the indirect draw parameters. It
-       * currently only stores the number of requested draw calls but more
-       * parameters could potentially be added.
-       */
-      struct brw_bo *draw_params_count_bo;
-      uint32_t draw_params_count_offset;
-
-      /**
-       * Draw indirect buffer.
-       */
-      unsigned draw_indirect_stride;
-      GLsizeiptr draw_indirect_offset;
-      struct gl_buffer_object *draw_indirect_data;
-   } draw;
-
-   struct {
-      /**
-       * For gl_NumWorkGroups: If num_work_groups_bo is non NULL, then it is
-       * an indirect call, and num_work_groups_offset is valid. Otherwise,
-       * num_work_groups is set based on glDispatchCompute.
-       */
-      struct brw_bo *num_work_groups_bo;
-      GLintptr num_work_groups_offset;
-      const GLuint *num_work_groups;
-      /**
-       * This is only used alongside ARB_compute_variable_group_size when the
-       * local work group size is variable, otherwise it's NULL.
-       */
-      const GLuint *group_size;
-   } compute;
-
-   struct {
-      struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
-      struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
-
-      struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
-      GLuint nr_enabled;
-      GLuint nr_buffers;
-
-      /* Summary of size and varying of active arrays, so we can check
-       * for changes to this state:
-       */
-      bool index_bounds_valid;
-      unsigned int min_index, max_index;
-
-      /* Offset from start of vertex buffer so we can avoid redefining
-       * the same VB packed over and over again.
-       */
-      unsigned int start_vertex_bias;
-
-      /**
-       * Certain vertex attribute formats aren't natively handled by the
-       * hardware and require special VS code to fix up their values.
-       *
-       * These bitfields indicate which workarounds are needed.
-       */
-      uint8_t attrib_wa_flags[VERT_ATTRIB_MAX];
-
-      /* High bits of the last seen vertex buffer address (for workarounds). */
-      uint16_t last_bo_high_bits[33];
-   } vb;
-
-   struct {
-      /**
-       * Index buffer for this draw_prims call.
-       *
-       * Updates are signaled by BRW_NEW_INDICES.
-       */
-      const struct _mesa_index_buffer *ib;
-
-      /* Updates are signaled by BRW_NEW_INDEX_BUFFER. */
-      struct brw_bo *bo;
-      uint32_t size;
-      unsigned index_size;
-
-      /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
-       * avoid re-uploading the IB packet over and over if we're actually
-       * referencing the same index buffer.
-       */
-      unsigned int start_vertex_offset;
-
-      /* High bits of the last seen index buffer address (for workarounds). */
-      uint16_t last_bo_high_bits;
-
-      /* Used to understand is GPU state of primitive restart is up to date */
-      bool enable_cut_index;
-   } ib;
-
-   /* Active vertex program:
-    */
-   struct gl_program *programs[MESA_SHADER_STAGES];
-
-   /**
-    * Number of samples in ctx->DrawBuffer, updated by BRW_NEW_NUM_SAMPLES so
-    * that we don't have to reemit that state every time we change FBOs.
-    */
-   unsigned int num_samples;
-
-   /* BRW_NEW_URB_ALLOCATIONS:
-    */
-   struct {
-      GLuint vsize;  /* vertex size plus header in urb registers */
-      GLuint gsize;  /* GS output size in urb registers */
-      GLuint hsize;  /* Tessellation control output size in urb registers */
-      GLuint dsize;  /* Tessellation evaluation output size in urb registers */
-      GLuint csize;  /* constant buffer size in urb registers */
-      GLuint sfsize; /* setup data size in urb registers */
-
-      bool constrained;
-
-      GLuint nr_vs_entries;
-      GLuint nr_hs_entries;
-      GLuint nr_ds_entries;
-      GLuint nr_gs_entries;
-      GLuint nr_clip_entries;
-      GLuint nr_sf_entries;
-      GLuint nr_cs_entries;
-
-      GLuint vs_start;
-      GLuint hs_start;
-      GLuint ds_start;
-      GLuint gs_start;
-      GLuint clip_start;
-      GLuint sf_start;
-      GLuint cs_start;
-      /**
-       * URB size in the current configuration.  The units this is expressed
-       * in are somewhat inconsistent, see intel_device_info::urb::size.
-       *
-       * FINISHME: Represent the URB size consistently in KB on all platforms.
-       */
-      GLuint size;
-
-      /* True if the most recently sent _3DSTATE_URB message allocated
-       * URB space for the GS.
-       */
-      bool gs_present;
-
-      /* True if the most recently sent _3DSTATE_URB message allocated
-       * URB space for the HS and DS.
-       */
-      bool tess_present;
-   } urb;
-
-
-   /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
-   struct {
-      GLuint wm_start;  /**< pos of first wm const in CURBE buffer */
-      GLuint wm_size;   /**< number of float[4] consts, multiple of 16 */
-      GLuint clip_start;
-      GLuint clip_size;
-      GLuint vs_start;
-      GLuint vs_size;
-      GLuint total_size;
-
-      /**
-       * Pointer to the (intel_upload.c-generated) BO containing the uniforms
-       * for upload to the CURBE.
-       */
-      struct brw_bo *curbe_bo;
-      /** Offset within curbe_bo of space for current curbe entry */
-      GLuint curbe_offset;
-   } curbe;
-
-   /**
-    * Layout of vertex data exiting the geometry portion of the pipleine.
-    * This comes from the last enabled shader stage (GS, DS, or VS).
-    *
-    * BRW_NEW_VUE_MAP_GEOM_OUT is flagged when the VUE map changes.
-    */
-   struct brw_vue_map vue_map_geom_out;
-
-   struct {
-      struct brw_stage_state base;
-   } vs;
-
-   struct {
-      struct brw_stage_state base;
-   } tcs;
-
-   struct {
-      struct brw_stage_state base;
-   } tes;
-
-   struct {
-      struct brw_stage_state base;
-
-      /**
-       * True if the 3DSTATE_GS command most recently emitted to the 3D
-       * pipeline enabled the GS; false otherwise.
-       */
-      bool enabled;
-   } gs;
-
-   struct {
-      struct brw_ff_gs_prog_data *prog_data;
-
-      bool prog_active;
-      /** Offset in the program cache to the CLIP program pre-gfx6 */
-      uint32_t prog_offset;
-      uint32_t state_offset;
-
-      uint32_t bind_bo_offset;
-      /**
-       * Surface offsets for the binding table. We only need surfaces to
-       * implement transform feedback so BRW_MAX_SOL_BINDINGS is all that we
-       * need in this case.
-       */
-      uint32_t surf_offset[BRW_MAX_SOL_BINDINGS];
-   } ff_gs;
-
-   struct {
-      struct brw_clip_prog_data *prog_data;
-
-      /** Offset in the program cache to the CLIP program pre-gfx6 */
-      uint32_t prog_offset;
-
-      /* Offset in the batch to the CLIP state on pre-gfx6. */
-      uint32_t state_offset;
-
-      /* As of gfx6, this is the offset in the batch to the CLIP VP,
-       * instead of vp_bo.
-       */
-      uint32_t vp_offset;
-
-      /**
-       * The number of viewports to use.  If gl_ViewportIndex is written,
-       * we can have up to ctx->Const.MaxViewports viewports.  If not,
-       * the viewport index is always 0, so we can only emit one.
-       */
-      uint8_t viewport_count;
-   } clip;
-
-
-   struct {
-      struct brw_sf_prog_data *prog_data;
-
-      /** Offset in the program cache to the CLIP program pre-gfx6 */
-      uint32_t prog_offset;
-      uint32_t state_offset;
-      uint32_t vp_offset;
-   } sf;
-
-   struct {
-      struct brw_stage_state base;
-
-      /**
-       * Buffer object used in place of multisampled null render targets on
-       * Gfx6.  See brw_emit_null_surface_state().
-       */
-      struct brw_bo *multisampled_null_render_target_bo;
-
-      float offset_clamp;
-   } wm;
-
-   struct {
-      struct brw_stage_state base;
-   } cs;
-
-   struct {
-      uint32_t state_offset;
-      uint32_t blend_state_offset;
-      uint32_t depth_stencil_state_offset;
-      uint32_t vp_offset;
-   } cc;
-
-   struct {
-      struct brw_query_object *obj;
-      bool begin_emitted;
-   } query;
-
-   struct {
-      enum brw_predicate_state state;
-      bool supported;
-   } predicate;
-
-   struct intel_perf_context *perf_ctx;
-
-   int num_atoms[BRW_NUM_PIPELINES];
-   const struct brw_tracked_state render_atoms[76];
-   const struct brw_tracked_state compute_atoms[11];
-
-   const enum isl_format *mesa_to_isl_render_format;
-   const bool *mesa_format_supports_render;
-
-   /* PrimitiveRestart */
-   struct {
-      bool in_progress;
-      bool enable_cut_index;
-      unsigned restart_index;
-   } prim_restart;
-
-   /** Computed depth/stencil/hiz state from the current attached
-    * renderbuffers, valid only during the drawing state upload loop after
-    * brw_workaround_depthstencil_alignment().
-    */
-   struct {
-      /* Inter-tile (page-aligned) byte offsets. */
-      uint32_t depth_offset;
-      /* Intra-tile x,y offsets for drawing to combined depth-stencil. Only
-       * used for Gen < 6.
-       */
-      uint32_t tile_x, tile_y;
-   } depthstencil;
-
-   uint32_t num_instances;
-   int basevertex;
-   int baseinstance;
-
-   struct {
-      const struct intel_l3_config *config;
-   } l3;
-
-   struct {
-      struct brw_bo *bo;
-      const char **names;
-      int *ids;
-      enum shader_time_shader_type *types;
-      struct shader_times *cumulative;
-      int num_entries;
-      int max_entries;
-      double report_time;
-   } shader_time;
-
-   struct brw_fast_clear_state *fast_clear_state;
-
-   /* Array of aux usages to use for drawing.  Aux usage for render targets is
-    * a bit more complex than simply calling a single function so we need some
-    * way of passing it form brw_draw.c to surface state setup.
-    */
-   enum isl_aux_usage draw_aux_usage[MAX_DRAW_BUFFERS];
-
-   enum gfx9_astc5x5_wa_tex_type gfx9_astc5x5_wa_tex_mask;
-
-   /** Last rendering scale argument provided to brw_emit_hashing_mode(). */
-   unsigned current_hash_scale;
-
-   __DRIcontext *driContext;
-   struct brw_screen *screen;
-   void *mem_ctx;
-};
-
-/* brw_clear.c */
-extern void brw_init_clear_functions(struct dd_function_table *functions);
-
-/*======================================================================
- * brw_context.c
- */
-extern const char *const brw_vendor_string;
-
-extern const char *
-brw_get_renderer_string(const struct brw_screen *screen);
-
-enum {
-   DRI_CONF_BO_REUSE_DISABLED,
-   DRI_CONF_BO_REUSE_ALL
-};
-
-void brw_update_renderbuffers(__DRIcontext *context,
-                                __DRIdrawable *drawable);
-void brw_prepare_render(struct brw_context *brw);
-
-void gfx9_apply_single_tex_astc5x5_wa(struct brw_context *brw,
-                                      mesa_format format,
-                                      enum isl_aux_usage aux_usage);
-
-void brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
-                                bool *draw_aux_buffer_disabled);
-
-void brw_resolve_for_dri2_flush(struct brw_context *brw,
-                                  __DRIdrawable *drawable);
-
-GLboolean brw_create_context(gl_api api,
-                             const struct gl_config *mesaVis,
-                             __DRIcontext *driContextPriv,
-                             const struct __DriverContextConfig *ctx_config,
-                             unsigned *error,
-                             void *sharedContextPrivate);
-
-/*======================================================================
- * brw_misc_state.c
- */
-void brw_workaround_depthstencil_alignment(struct brw_context *brw,
-                                           GLbitfield clear_mask);
-void brw_emit_hashing_mode(struct brw_context *brw, unsigned width,
-                           unsigned height, unsigned scale);
-
-/* brw_object_purgeable.c */
-void brw_init_object_purgeable_functions(struct dd_function_table *functions);
-
-/*======================================================================
- * brw_queryobj.c
- */
-void brw_init_common_queryobj_functions(struct dd_function_table *functions);
-void gfx4_init_queryobj_functions(struct dd_function_table *functions);
-void brw_emit_query_begin(struct brw_context *brw);
-void brw_emit_query_end(struct brw_context *brw);
-void brw_query_counter(struct gl_context *ctx, struct gl_query_object *q);
-bool brw_is_query_pipelined(struct brw_query_object *query);
-uint64_t brw_raw_timestamp_delta(struct brw_context *brw,
-                                 uint64_t time0, uint64_t time1);
-
-/** gfx6_queryobj.c */
-void gfx6_init_queryobj_functions(struct dd_function_table *functions);
-void brw_write_timestamp(struct brw_context *brw, struct brw_bo *bo, int idx);
-void brw_write_depth_count(struct brw_context *brw, struct brw_bo *bo, int idx);
-
-/** hsw_queryobj.c */
-void hsw_overflow_result_to_gpr0(struct brw_context *brw,
-                                 struct brw_query_object *query,
-                                 int count);
-void hsw_init_queryobj_functions(struct dd_function_table *functions);
-
-/** brw_conditional_render.c */
-void brw_init_conditional_render_functions(struct dd_function_table *functions);
-bool brw_check_conditional_render(struct brw_context *brw);
-
-/** brw_batch.c */
-void brw_load_register_mem(struct brw_context *brw,
-                           uint32_t reg,
-                           struct brw_bo *bo,
-                           uint32_t offset);
-void brw_load_register_mem64(struct brw_context *brw,
-                             uint32_t reg,
-                             struct brw_bo *bo,
-                             uint32_t offset);
-void brw_store_register_mem32(struct brw_context *brw,
-                              struct brw_bo *bo, uint32_t reg, uint32_t offset);
-void brw_store_register_mem64(struct brw_context *brw,
-                              struct brw_bo *bo, uint32_t reg, uint32_t offset);
-void brw_load_register_imm32(struct brw_context *brw,
-                             uint32_t reg, uint32_t imm);
-void brw_load_register_imm64(struct brw_context *brw,
-                             uint32_t reg, uint64_t imm);
-void brw_load_register_reg(struct brw_context *brw, uint32_t dst,
-                           uint32_t src);
-void brw_load_register_reg64(struct brw_context *brw, uint32_t dst,
-                             uint32_t src);
-void brw_store_data_imm32(struct brw_context *brw, struct brw_bo *bo,
-                          uint32_t offset, uint32_t imm);
-void brw_store_data_imm64(struct brw_context *brw, struct brw_bo *bo,
-                          uint32_t offset, uint64_t imm);
-
-/*======================================================================
- * intel_tex_validate.c
- */
-void brw_validate_textures( struct brw_context *brw );
-
-
-/*======================================================================
- * brw_program.c
- */
-void brw_init_frag_prog_functions(struct dd_function_table *functions);
-
-void brw_get_scratch_bo(struct brw_context *brw,
-                        struct brw_bo **scratch_bo, int size);
-void brw_alloc_stage_scratch(struct brw_context *brw,
-                             struct brw_stage_state *stage_state,
-                             unsigned per_thread_size);
-void brw_init_shader_time(struct brw_context *brw);
-int brw_get_shader_time_index(struct brw_context *brw,
-                              struct gl_program *prog,
-                              enum shader_time_shader_type type,
-                              bool is_glsl_sh);
-void brw_collect_and_report_shader_time(struct brw_context *brw);
-void brw_destroy_shader_time(struct brw_context *brw);
-
-/* brw_urb.c
- */
-void brw_calculate_urb_fence(struct brw_context *brw, unsigned csize,
-                             unsigned vsize, unsigned sfsize);
-void brw_upload_urb_fence(struct brw_context *brw);
-
-/* brw_curbe.c
- */
-void brw_upload_cs_urb_state(struct brw_context *brw);
-
-/* brw_vs.c */
-gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx);
-
-/* brw_draw_upload.c */
-unsigned brw_get_vertex_surface_type(struct brw_context *brw,
-                                     const struct gl_vertex_format *glformat);
-
-static inline unsigned
-brw_get_index_type(unsigned index_size)
-{
-   /* The hw needs 0x00, 0x01, and 0x02 for ubyte, ushort, and uint,
-    * respectively.
-    */
-   return index_size >> 1;
-}
-
-void brw_prepare_vertices(struct brw_context *brw);
-
-/* brw_wm_surface_state.c */
-void brw_update_buffer_texture_surface(struct gl_context *ctx,
-                                       unsigned unit,
-                                       uint32_t *surf_offset);
-void
-brw_update_sol_surface(struct brw_context *brw,
-                       struct gl_buffer_object *buffer_obj,
-                       uint32_t *out_offset, unsigned num_vector_components,
-                       unsigned stride_dwords, unsigned offset_dwords);
-void brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
-                             struct brw_stage_state *stage_state,
-                             struct brw_stage_prog_data *prog_data);
-void brw_upload_image_surfaces(struct brw_context *brw,
-                               const struct gl_program *prog,
-                               struct brw_stage_state *stage_state,
-                               struct brw_stage_prog_data *prog_data);
-
-/* brw_surface_formats.c */
-void brw_screen_init_surface_formats(struct brw_screen *screen);
-void brw_init_surface_formats(struct brw_context *brw);
-bool brw_render_target_supported(struct brw_context *brw,
-                                 struct gl_renderbuffer *rb);
-uint32_t brw_depth_format(struct brw_context *brw, mesa_format format);
-
-/* brw_performance_query.c */
-void brw_init_performance_queries(struct brw_context *brw);
-
-/* intel_extensions.c */
-extern void brw_init_extensions(struct gl_context *ctx);
-
-/* intel_state.c */
-extern int brw_translate_shadow_compare_func(GLenum func);
-extern int brw_translate_compare_func(GLenum func);
-extern int brw_translate_stencil_op(GLenum op);
-
-/* brw_sync.c */
-void brw_init_syncobj_functions(struct dd_function_table *functions);
-
-/* gfx6_sol.c */
-struct gl_transform_feedback_object *
-brw_new_transform_feedback(struct gl_context *ctx, GLuint name);
-void
-brw_delete_transform_feedback(struct gl_context *ctx,
-                              struct gl_transform_feedback_object *obj);
-void
-brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
-                             struct gl_transform_feedback_object *obj);
-void
-brw_end_transform_feedback(struct gl_context *ctx,
-                           struct gl_transform_feedback_object *obj);
-void
-brw_pause_transform_feedback(struct gl_context *ctx,
-                             struct gl_transform_feedback_object *obj);
-void
-brw_resume_transform_feedback(struct gl_context *ctx,
-                              struct gl_transform_feedback_object *obj);
-void
-brw_save_primitives_written_counters(struct brw_context *brw,
-                                     struct brw_transform_feedback_object *obj);
-GLsizei
-brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
-                                        struct gl_transform_feedback_object *obj,
-                                        GLuint stream);
-
-/* gfx7_sol_state.c */
-void
-gfx7_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
-                              struct gl_transform_feedback_object *obj);
-void
-gfx7_end_transform_feedback(struct gl_context *ctx,
-                            struct gl_transform_feedback_object *obj);
-void
-gfx7_pause_transform_feedback(struct gl_context *ctx,
-                              struct gl_transform_feedback_object *obj);
-void
-gfx7_resume_transform_feedback(struct gl_context *ctx,
-                               struct gl_transform_feedback_object *obj);
-
-/* hsw_sol.c */
-void
-hsw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
-                             struct gl_transform_feedback_object *obj);
-void
-hsw_end_transform_feedback(struct gl_context *ctx,
-                           struct gl_transform_feedback_object *obj);
-void
-hsw_pause_transform_feedback(struct gl_context *ctx,
-                             struct gl_transform_feedback_object *obj);
-void
-hsw_resume_transform_feedback(struct gl_context *ctx,
-                              struct gl_transform_feedback_object *obj);
-
-/* brw_blorp_blit.cpp */
-GLbitfield
-brw_blorp_framebuffer(struct brw_context *brw,
-                      struct gl_framebuffer *readFb,
-                      struct gl_framebuffer *drawFb,
-                      GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
-                      GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
-                      GLbitfield mask, GLenum filter);
-
-bool
-brw_blorp_copytexsubimage(struct brw_context *brw,
-                          struct gl_renderbuffer *src_rb,
-                          struct gl_texture_image *dst_image,
-                          int slice,
-                          int srcX0, int srcY0,
-                          int dstX0, int dstY0,
-                          int width, int height);
-
-/* brw_generate_mipmap.c */
-void brw_generate_mipmap(struct gl_context *ctx, GLenum target,
-                         struct gl_texture_object *tex_obj);
-
-void
-gfx6_get_sample_position(struct gl_context *ctx,
-                         struct gl_framebuffer *fb,
-                         GLuint index,
-                         GLfloat *result);
-
-/* gfx8_multisample_state.c */
-void gfx8_emit_3dstate_sample_pattern(struct brw_context *brw);
-
-/* gfx7_l3_state.c */
-void brw_emit_l3_state(struct brw_context *brw);
-
-/* gfx7_urb.c */
-void
-gfx7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
-                              unsigned hs_size, unsigned ds_size,
-                              unsigned gs_size, unsigned fs_size);
-
-void
-gfx6_upload_urb(struct brw_context *brw, unsigned vs_size,
-                bool gs_present, unsigned gs_size);
-void
-gfx7_upload_urb(struct brw_context *brw, unsigned vs_size,
-                bool gs_present, bool tess_present);
-
-/* brw_reset.c */
-extern GLenum
-brw_get_graphics_reset_status(struct gl_context *ctx);
-void
-brw_check_for_reset(struct brw_context *brw);
-
-/* brw_compute.c */
-extern void
-brw_init_compute_functions(struct dd_function_table *functions);
-
-/* brw_program_binary.c */
-extern void
-brw_program_binary_init(unsigned device_id);
-extern void
-brw_get_program_binary_driver_sha1(struct gl_context *ctx, uint8_t *sha1);
-void brw_serialize_program_binary(struct gl_context *ctx,
-                                  struct gl_shader_program *sh_prog,
-                                  struct gl_program *prog);
-extern void
-brw_deserialize_program_binary(struct gl_context *ctx,
-                               struct gl_shader_program *shProg,
-                               struct gl_program *prog);
-void
-brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog);
-void
-brw_program_deserialize_driver_blob(struct gl_context *ctx,
-                                    struct gl_program *prog,
-                                    gl_shader_stage stage);
-
-/*======================================================================
- * Inline conversion functions.  These are better-typed than the
- * macros used previously:
- */
-static inline struct brw_context *
-brw_context( struct gl_context *ctx )
-{
-   return (struct brw_context *)ctx;
-}
-
-static inline struct brw_program *
-brw_program(struct gl_program *p)
-{
-   return (struct brw_program *) p;
-}
-
-static inline const struct brw_program *
-brw_program_const(const struct gl_program *p)
-{
-   return (const struct brw_program *) p;
-}
-
-static inline bool
-brw_depth_writes_enabled(const struct brw_context *brw)
-{
-   const struct gl_context *ctx = &brw->ctx;
-
-   /* We consider depth writes disabled if the depth function is GL_EQUAL,
-    * because it would just overwrite the existing depth value with itself.
-    *
-    * These bonus depth writes not only use bandwidth, but they also can
-    * prevent early depth processing.  For example, if the pixel shader
-    * discards, the hardware must invoke the to determine whether or not
-    * to do the depth write.  If writes are disabled, we may still be able
-    * to do the depth test before the shader, and skip the shader execution.
-    *
-    * The Broadwell 3DSTATE_WM_DEPTH_STENCIL documentation also contains
-    * a programming note saying to disable depth writes for EQUAL.
-    */
-   return ctx->Depth.Test && ctx->Depth.Mask && ctx->Depth.Func != GL_EQUAL;
-}
-
-void
-brw_emit_depthbuffer(struct brw_context *brw);
-
-uint32_t get_hw_prim_for_gl_prim(int mode);
-
-void
-gfx6_upload_push_constants(struct brw_context *brw,
-                           const struct gl_program *prog,
-                           const struct brw_stage_prog_data *prog_data,
-                           struct brw_stage_state *stage_state);
-
-bool
-gfx9_use_linear_1d_layout(const struct brw_context *brw,
-                          const struct brw_mipmap_tree *mt);
-
-/* brw_queryformat.c */
-void brw_query_internal_format(struct gl_context *ctx, GLenum target,
-                               GLenum internalFormat, GLenum pname,
-                               GLint *params);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_copy_image.c b/src/mesa/drivers/dri/i965/brw_copy_image.c
deleted file mode 100644
index 5b9f49a..0000000
--- a/src/mesa/drivers/dri/i965/brw_copy_image.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2014 Intel Corporation All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Jason Ekstrand <jason.ekstrand@intel.com>
- */
-
-#include "brw_blorp.h"
-#include "brw_fbo.h"
-#include "brw_tex.h"
-#include "brw_blit.h"
-#include "brw_mipmap_tree.h"
-#include "main/formats.h"
-#include "main/teximage.h"
-#include "drivers/common/meta.h"
-
-static void
-copy_miptrees(struct brw_context *brw,
-              struct brw_mipmap_tree *src_mt,
-              int src_x, int src_y, int src_z, unsigned src_level,
-              struct brw_mipmap_tree *dst_mt,
-              int dst_x, int dst_y, int dst_z, unsigned dst_level,
-              int src_width, int src_height)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (devinfo->ver <= 5) {
-      /* On gfx4-5, try BLT first.
-       *
-       * Gfx4-5 have a single ring for both 3D and BLT operations, so there's
-       * no inter-ring synchronization issues like on Gfx6+.  It is apparently
-       * faster than using the 3D pipeline.  Original Gfx4 also has to rebase
-       * and copy miptree slices in order to render to unaligned locations.
-       */
-      if (brw_miptree_copy(brw, src_mt, src_level, src_z, src_x, src_y,
-                             dst_mt, dst_level, dst_z, dst_x, dst_y,
-                             src_width, src_height))
-         return;
-   }
-
-   brw_blorp_copy_miptrees(brw,
-                           src_mt, src_level, src_z,
-                           dst_mt, dst_level, dst_z,
-                           src_x, src_y, dst_x, dst_y,
-                           src_width, src_height);
-}
-
-static void
-brw_copy_image_sub_data(struct gl_context *ctx,
-                        struct gl_texture_image *src_image,
-                        struct gl_renderbuffer *src_renderbuffer,
-                        int src_x, int src_y, int src_z,
-                        struct gl_texture_image *dst_image,
-                        struct gl_renderbuffer *dst_renderbuffer,
-                        int dst_x, int dst_y, int dst_z,
-                        int src_width, int src_height)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_mipmap_tree *src_mt, *dst_mt;
-   unsigned src_level, dst_level;
-
-   if (src_image) {
-      src_mt = brw_texture_image(src_image)->mt;
-      src_level = src_image->Level + src_image->TexObject->Attrib.MinLevel;
-
-      /* Cube maps actually have different images per face */
-      if (src_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
-         src_z = src_image->Face;
-
-      src_z += src_image->TexObject->Attrib.MinLayer;
-   } else {
-      assert(src_renderbuffer);
-      src_mt = brw_renderbuffer(src_renderbuffer)->mt;
-      src_image = src_renderbuffer->TexImage;
-      src_level = 0;
-   }
-
-   if (dst_image) {
-      dst_mt = brw_texture_image(dst_image)->mt;
-
-      dst_level = dst_image->Level + dst_image->TexObject->Attrib.MinLevel;
-
-      /* Cube maps actually have different images per face */
-      if (dst_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
-         dst_z = dst_image->Face;
-
-      dst_z += dst_image->TexObject->Attrib.MinLayer;
-   } else {
-      assert(dst_renderbuffer);
-      dst_mt = brw_renderbuffer(dst_renderbuffer)->mt;
-      dst_image = dst_renderbuffer->TexImage;
-      dst_level = 0;
-   }
-
-   copy_miptrees(brw, src_mt, src_x, src_y, src_z, src_level,
-                 dst_mt, dst_x, dst_y, dst_z, dst_level,
-                 src_width, src_height);
-
-   /* CopyImage only works for equal formats, texture view equivalence
-    * classes, and a couple special cases for compressed textures.
-    *
-    * Notably, GL_DEPTH_STENCIL does not appear in any equivalence
-    * classes, so we know the formats must be the same, and thus both
-    * will either have stencil, or not.  They can't be mismatched.
-    */
-   assert((src_mt->stencil_mt != NULL) == (dst_mt->stencil_mt != NULL));
-
-   if (dst_mt->stencil_mt) {
-      copy_miptrees(brw, src_mt->stencil_mt, src_x, src_y, src_z, src_level,
-                    dst_mt->stencil_mt, dst_x, dst_y, dst_z, dst_level,
-                    src_width, src_height);
-   }
-}
-
-void
-brw_init_copy_image_functions(struct dd_function_table *functions)
-{
-   functions->CopyImageSubData = brw_copy_image_sub_data;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c
deleted file mode 100644
index 786dda4..0000000
--- a/src/mesa/drivers/dri/i965/brw_cs.c
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Copyright (c) 2014 - 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "util/ralloc.h"
-#include "brw_context.h"
-#include "brw_cs.h"
-#include "brw_wm.h"
-#include "brw_mipmap_tree.h"
-#include "brw_state.h"
-#include "brw_batch.h"
-#include "compiler/brw_nir.h"
-#include "brw_program.h"
-#include "compiler/glsl/ir_uniform.h"
-
-static void
-assign_cs_binding_table_offsets(const struct intel_device_info *devinfo,
-                                const struct gl_program *prog,
-                                struct brw_cs_prog_data *prog_data)
-{
-   uint32_t next_binding_table_offset = 0;
-
-   /* May not be used if the gl_NumWorkGroups variable is not accessed. */
-   prog_data->binding_table.work_groups_start = next_binding_table_offset;
-   next_binding_table_offset++;
-
-   brw_assign_common_binding_table_offsets(devinfo, prog, &prog_data->base,
-                                           next_binding_table_offset);
-}
-
-static bool
-brw_codegen_cs_prog(struct brw_context *brw,
-                    struct brw_program *cp,
-                    struct brw_cs_prog_key *key)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const GLuint *program;
-   void *mem_ctx = ralloc_context(NULL);
-   struct brw_cs_prog_data prog_data;
-   bool start_busy = false;
-   double start_time = 0;
-   nir_shader *nir = nir_shader_clone(mem_ctx, cp->program.nir);
-
-   memset(&prog_data, 0, sizeof(prog_data));
-
-   if (cp->program.info.shared_size > 64 * 1024) {
-      cp->program.sh.data->LinkStatus = LINKING_FAILURE;
-      const char *error_str =
-         "Compute shader used more than 64KB of shared variables";
-      ralloc_strcat(&cp->program.sh.data->InfoLog, error_str);
-      _mesa_problem(NULL, "Failed to link compute shader: %s\n", error_str);
-
-      ralloc_free(mem_ctx);
-      return false;
-   }
-
-   assign_cs_binding_table_offsets(devinfo, &cp->program, &prog_data);
-
-   brw_nir_setup_glsl_uniforms(mem_ctx, nir,
-                               &cp->program, &prog_data.base, true);
-
-   if (unlikely(brw->perf_debug)) {
-      start_busy = (brw->batch.last_bo &&
-                    brw_bo_busy(brw->batch.last_bo));
-      start_time = get_time();
-   }
-
-
-   brw_nir_lower_cs_intrinsics(nir);
-
-   struct brw_compile_cs_params params = {
-      .nir = nir,
-      .key = key,
-      .prog_data = &prog_data,
-      .log_data = brw,
-   };
-
-   if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
-      params.shader_time = true;
-      params.shader_time_index =
-         brw_get_shader_time_index(brw, &cp->program, ST_CS, true);
-   }
-
-   program = brw_compile_cs(brw->screen->compiler, mem_ctx, &params);
-   if (program == NULL) {
-      cp->program.sh.data->LinkStatus = LINKING_FAILURE;
-      ralloc_strcat(&cp->program.sh.data->InfoLog, params.error_str);
-      _mesa_problem(NULL, "Failed to compile compute shader: %s\n", params.error_str);
-
-      ralloc_free(mem_ctx);
-      return false;
-   }
-
-   if (unlikely(brw->perf_debug)) {
-      if (cp->compiled_once) {
-         brw_debug_recompile(brw, MESA_SHADER_COMPUTE, cp->program.Id,
-                             &key->base);
-      }
-      cp->compiled_once = true;
-
-      if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
-         perf_debug("CS compile took %.03f ms and stalled the GPU\n",
-                    (get_time() - start_time) * 1000);
-      }
-   }
-
-   brw_alloc_stage_scratch(brw, &brw->cs.base, prog_data.base.total_scratch);
-
-   /* The param and pull_param arrays will be freed by the shader cache. */
-   ralloc_steal(NULL, prog_data.base.param);
-   ralloc_steal(NULL, prog_data.base.pull_param);
-   brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG,
-                    key, sizeof(*key),
-                    program, prog_data.base.program_size,
-                    &prog_data, sizeof(prog_data),
-                    &brw->cs.base.prog_offset, &brw->cs.base.prog_data);
-   ralloc_free(mem_ctx);
-
-   return true;
-}
-
-
-void
-brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key)
-{
-   struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_COMPUTE_PROGRAM */
-   const struct brw_program *cp =
-      (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
-
-   memset(key, 0, sizeof(*key));
-
-   /* _NEW_TEXTURE */
-   brw_populate_base_prog_key(ctx, cp, &key->base);
-}
-
-
-void
-brw_upload_cs_prog(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   struct brw_cs_prog_key key;
-   struct brw_program *cp =
-      (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
-
-   if (!cp)
-      return;
-
-   if (!brw_state_dirty(brw, _NEW_TEXTURE, BRW_NEW_COMPUTE_PROGRAM))
-      return;
-
-   brw->cs.base.sampler_count =
-      util_last_bit(ctx->ComputeProgram._Current->SamplersUsed);
-
-   brw_cs_populate_key(brw, &key);
-
-   if (brw_search_cache(&brw->cache, BRW_CACHE_CS_PROG, &key, sizeof(key),
-                        &brw->cs.base.prog_offset, &brw->cs.base.prog_data,
-                        true))
-      return;
-
-   if (brw_disk_cache_upload_program(brw, MESA_SHADER_COMPUTE))
-      return;
-
-   cp = (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
-   cp->id = key.base.program_string_id;
-
-   ASSERTED bool success = brw_codegen_cs_prog(brw, cp, &key);
-   assert(success);
-}
-
-void
-brw_cs_populate_default_key(const struct brw_compiler *compiler,
-                            struct brw_cs_prog_key *key,
-                            struct gl_program *prog)
-{
-   const struct intel_device_info *devinfo = compiler->devinfo;
-   memset(key, 0, sizeof(*key));
-   brw_populate_default_base_prog_key(devinfo, brw_program(prog), &key->base);
-}
-
-bool
-brw_cs_precompile(struct gl_context *ctx, struct gl_program *prog)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_cs_prog_key key;
-
-   struct brw_program *bcp = brw_program(prog);
-
-   brw_cs_populate_default_key(brw->screen->compiler, &key, prog);
-
-   uint32_t old_prog_offset = brw->cs.base.prog_offset;
-   struct brw_stage_prog_data *old_prog_data = brw->cs.base.prog_data;
-
-   bool success = brw_codegen_cs_prog(brw, bcp, &key);
-
-   brw->cs.base.prog_offset = old_prog_offset;
-   brw->cs.base.prog_data = old_prog_data;
-
-   return success;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h
deleted file mode 100644
index a0d43ab..0000000
--- a/src/mesa/drivers/dri/i965/brw_cs.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright Â© 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-
-#ifndef BRW_CS_H
-#define BRW_CS_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void
-brw_upload_cs_prog(struct brw_context *brw);
-
-void
-brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key);
-void
-brw_cs_populate_default_key(const struct brw_compiler *compiler,
-                            struct brw_cs_prog_key *key,
-                            struct gl_program *prog);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* BRW_CS_H */
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
deleted file mode 100644
index 87e99ef..0000000
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ /dev/null
@@ -1,356 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-/** @file brw_curbe.c
- *
- * Push constant handling for gfx4/5.
- *
- * Push constants are constant values (such as GLSL uniforms) that are
- * pre-loaded into a shader stage's register space at thread spawn time.  On
- * gfx4 and gfx5, we create a blob in memory containing all the push constants
- * for all the stages in order.  At CMD_CONST_BUFFER time that blob is loaded
- * into URB space as a constant URB entry (CURBE) so that it can be accessed
- * quickly at thread setup time.  Each individual fixed function unit's state
- * (brw_vs_state.c for example) tells the hardware which subset of the CURBE
- * it wants in its register space, and we calculate those areas here under the
- * BRW_NEW_PUSH_CONSTANT_ALLOCATION state flag.  The brw_urb.c allocation will control
- * how many CURBEs can be loaded into the hardware at once before a pipeline
- * stall occurs at CMD_CONST_BUFFER time.
- *
- * On gfx6+, constant handling becomes a much simpler set of per-unit state.
- * See gfx6_upload_vec4_push_constants() in gfx6_vs_state.c for that code.
- */
-
-
-#include "compiler/nir/nir.h"
-#include "main/context.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-#include "program/prog_statevars.h"
-#include "util/bitscan.h"
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "brw_util.h"
-#include "util/u_math.h"
-
-
-/**
- * Partition the CURBE between the various users of constant values.
- *
- * If the users all fit within the previous allocatation, we avoid changing
- * the layout because that means reuploading all unit state and uploading new
- * constant buffers.
- */
-static void calculate_curbe_offsets( struct brw_context *brw )
-{
-   struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_FS_PROG_DATA */
-   const GLuint nr_fp_regs = (brw->wm.base.prog_data->nr_params + 15) / 16;
-
-   /* BRW_NEW_VS_PROG_DATA */
-   const GLuint nr_vp_regs = (brw->vs.base.prog_data->nr_params + 15) / 16;
-   GLuint nr_clip_regs = 0;
-   GLuint total_regs;
-
-   /* _NEW_TRANSFORM */
-   if (ctx->Transform.ClipPlanesEnabled) {
-      GLuint nr_planes = 6 + util_bitcount(ctx->Transform.ClipPlanesEnabled);
-      nr_clip_regs = (nr_planes * 4 + 15) / 16;
-   }
-
-
-   total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
-
-   /* The CURBE allocation size is limited to 32 512-bit units (128 EU
-    * registers, or 1024 floats).  See CS_URB_STATE in the gfx4 or gfx5
-    * (volume 1, part 1) PRMs.
-    *
-    * Note that in brw_fs.cpp we're only loading up to 16 EU registers of
-    * values as push constants before spilling to pull constants, and in
-    * brw_vec4.cpp we're loading up to 32 registers of push constants.  An EU
-    * register is 1/2 of one of these URB entry units, so that leaves us 16 EU
-    * regs for clip.
-    */
-   assert(total_regs <= 32);
-
-   /* Lazy resize:
-    */
-   if (nr_fp_regs > brw->curbe.wm_size ||
-       nr_vp_regs > brw->curbe.vs_size ||
-       nr_clip_regs != brw->curbe.clip_size ||
-       (total_regs < brw->curbe.total_size / 4 &&
-        brw->curbe.total_size > 16)) {
-
-      GLuint reg = 0;
-
-      /* Calculate a new layout:
-       */
-      reg = 0;
-      brw->curbe.wm_start = reg;
-      brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
-      brw->curbe.clip_start = reg;
-      brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
-      brw->curbe.vs_start = reg;
-      brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
-      brw->curbe.total_size = reg;
-
-      if (0)
-         fprintf(stderr, "curbe wm %d+%d clip %d+%d vs %d+%d\n",
-                 brw->curbe.wm_start,
-                 brw->curbe.wm_size,
-                 brw->curbe.clip_start,
-                 brw->curbe.clip_size,
-                 brw->curbe.vs_start,
-                 brw->curbe.vs_size );
-
-      brw->ctx.NewDriverState |= BRW_NEW_PUSH_CONSTANT_ALLOCATION;
-   }
-}
-
-
-const struct brw_tracked_state brw_curbe_offsets = {
-   .dirty = {
-      .mesa = _NEW_TRANSFORM,
-      .brw  = BRW_NEW_CONTEXT |
-              BRW_NEW_BLORP |
-              BRW_NEW_FS_PROG_DATA |
-              BRW_NEW_VS_PROG_DATA,
-   },
-   .emit = calculate_curbe_offsets
-};
-
-
-
-
-/** Uploads the CS_URB_STATE packet.
- *
- * Just like brw_vs_state.c and brw_wm_state.c define a URB entry size and
- * number of entries for their stages, constant buffers do so using this state
- * packet.  Having multiple CURBEs in the URB at the same time allows the
- * hardware to avoid a pipeline stall between primitives using different
- * constant buffer contents.
- */
-void brw_upload_cs_urb_state(struct brw_context *brw)
-{
-   BEGIN_BATCH(2);
-   OUT_BATCH(CMD_CS_URB_STATE << 16 | (2-2));
-
-   /* BRW_NEW_URB_FENCE */
-   if (brw->urb.csize == 0) {
-      OUT_BATCH(0);
-   } else {
-      /* BRW_NEW_URB_FENCE */
-      assert(brw->urb.nr_cs_entries);
-      OUT_BATCH((brw->urb.csize - 1) << 4 | brw->urb.nr_cs_entries);
-   }
-   ADVANCE_BATCH();
-}
-
-static const GLfloat fixed_plane[6][4] = {
-   { 0,    0,   -1, 1 },
-   { 0,    0,    1, 1 },
-   { 0,   -1,    0, 1 },
-   { 0,    1,    0, 1 },
-   {-1,    0,    0, 1 },
-   { 1,    0,    0, 1 }
-};
-
-/**
- * Gathers together all the uniform values into a block of memory to be
- * uploaded into the CURBE, then emits the state packet telling the hardware
- * the new location.
- */
-static void
-brw_upload_constant_buffer(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
-   const GLuint sz = brw->curbe.total_size;
-   const GLuint bufsz = sz * 16 * sizeof(GLfloat);
-   gl_constant_value *buf;
-   GLuint i;
-   gl_clip_plane *clip_planes;
-
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
-
-   /* BRW_NEW_VERTEX_PROGRAM */
-   struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
-
-   if (sz == 0) {
-      goto emit;
-   }
-
-   buf = brw_upload_space(&brw->upload, bufsz, 64,
-                          &brw->curbe.curbe_bo, &brw->curbe.curbe_offset);
-
-   STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
-
-   /* fragment shader constants */
-   if (brw->curbe.wm_size) {
-      _mesa_load_state_parameters(ctx, fp->Parameters);
-
-      /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
-      GLuint offset = brw->curbe.wm_start * 16;
-
-      /* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
-      brw_populate_constant_data(brw, fp, &brw->wm.base, &buf[offset],
-                                 brw->wm.base.prog_data->param,
-                                 brw->wm.base.prog_data->nr_params);
-   }
-
-   /* clipper constants */
-   if (brw->curbe.clip_size) {
-      GLuint offset = brw->curbe.clip_start * 16;
-      GLbitfield mask;
-
-      /* If any planes are going this way, send them all this way:
-       */
-      for (i = 0; i < 6; i++) {
-         buf[offset + i * 4 + 0].f = fixed_plane[i][0];
-         buf[offset + i * 4 + 1].f = fixed_plane[i][1];
-         buf[offset + i * 4 + 2].f = fixed_plane[i][2];
-         buf[offset + i * 4 + 3].f = fixed_plane[i][3];
-      }
-
-      /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
-       * clip-space:
-       */
-      clip_planes = brw_select_clip_planes(ctx);
-      mask = ctx->Transform.ClipPlanesEnabled;
-      while (mask) {
-         const int j = u_bit_scan(&mask);
-         buf[offset + i * 4 + 0].f = clip_planes[j][0];
-         buf[offset + i * 4 + 1].f = clip_planes[j][1];
-         buf[offset + i * 4 + 2].f = clip_planes[j][2];
-         buf[offset + i * 4 + 3].f = clip_planes[j][3];
-         i++;
-      }
-   }
-
-   /* vertex shader constants */
-   if (brw->curbe.vs_size) {
-      _mesa_load_state_parameters(ctx, vp->Parameters);
-
-      GLuint offset = brw->curbe.vs_start * 16;
-
-      /* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
-      brw_populate_constant_data(brw, vp, &brw->vs.base, &buf[offset],
-                                 brw->vs.base.prog_data->param,
-                                 brw->vs.base.prog_data->nr_params);
-   }
-
-   if (0) {
-      for (i = 0; i < sz*16; i+=4)
-         fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
-                 buf[i+0].f, buf[i+1].f, buf[i+2].f, buf[i+3].f);
-   }
-
-   /* Because this provokes an action (ie copy the constants into the
-    * URB), it shouldn't be shortcircuited if identical to the
-    * previous time - because eg. the urb destination may have
-    * changed, or the urb contents different to last time.
-    *
-    * Note that the data referred to is actually copied internally,
-    * not just used in place according to passed pointer.
-    *
-    * It appears that the CS unit takes care of using each available
-    * URB entry (Const URB Entry == CURBE) in turn, and issuing
-    * flushes as necessary when doublebuffering of CURBEs isn't
-    * possible.
-    */
-
-emit:
-   /* BRW_NEW_URB_FENCE: From the gfx4 PRM, volume 1, section 3.9.8
-    * (CONSTANT_BUFFER (CURBE Load)):
-    *
-    *     "Modifying the CS URB allocation via URB_FENCE invalidates any
-    *      previous CURBE entries. Therefore software must subsequently
-    *      [re]issue a CONSTANT_BUFFER command before CURBE data can be used
-    *      in the pipeline."
-    */
-   BEGIN_BATCH(2);
-   if (brw->curbe.total_size == 0) {
-      OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
-      OUT_BATCH(0);
-   } else {
-      OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
-      OUT_RELOC(brw->curbe.curbe_bo, 0,
-                (brw->curbe.total_size - 1) + brw->curbe.curbe_offset);
-   }
-   ADVANCE_BATCH();
-
-   /* Work around a Broadwater/Crestline depth interpolator bug.  The
-    * following sequence will cause GPU hangs:
-    *
-    * 1. Change state so that all depth related fields in CC_STATE are
-    *    disabled, and in WM_STATE, only "PS Use Source Depth" is enabled.
-    * 2. Emit a CONSTANT_BUFFER packet.
-    * 3. Draw via 3DPRIMITIVE.
-    *
-    * The recommended workaround is to emit a non-pipelined state change after
-    * emitting CONSTANT_BUFFER, in order to drain the windowizer pipeline.
-    *
-    * We arbitrarily choose 3DSTATE_GLOBAL_DEPTH_CLAMP_OFFSET (as it's small),
-    * and always emit it when "PS Use Source Depth" is set.  We could be more
-    * precise, but the additional complexity is probably not worth it.
-    *
-    * BRW_NEW_FRAGMENT_PROGRAM
-    */
-   if (devinfo->verx10 == 40 &&
-       BITSET_TEST(fp->info.system_values_read, SYSTEM_VALUE_FRAG_COORD)) {
-      BEGIN_BATCH(2);
-      OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-}
-
-const struct brw_tracked_state brw_constant_buffer = {
-   .dirty = {
-      .mesa = _NEW_PROGRAM_CONSTANTS,
-      .brw  = BRW_NEW_BATCH |
-              BRW_NEW_BLORP |
-              BRW_NEW_PUSH_CONSTANT_ALLOCATION |
-              BRW_NEW_FRAGMENT_PROGRAM |
-              BRW_NEW_FS_PROG_DATA |
-              BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
-              BRW_NEW_URB_FENCE |
-              BRW_NEW_VS_PROG_DATA,
-   },
-   .emit = brw_upload_constant_buffer,
-};
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
deleted file mode 100644
index a548419..0000000
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ /dev/null
@@ -1,1668 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-#ifndef BRW_DEFINES_H
-#define BRW_DEFINES_H
-
-#include "util/macros.h"
-
-#define INTEL_MASK(high, low) (((1u<<((high)-(low)+1))-1)<<(low))
-/* Using the GNU statement expression extension */
-#define SET_FIELD(value, field)                                         \
-   ({                                                                   \
-      uint32_t fieldval = (uint32_t)(value) << field ## _SHIFT;         \
-      assert((fieldval & ~ field ## _MASK) == 0);                       \
-      fieldval & field ## _MASK;                                        \
-   })
-
-#define GET_BITS(data, high, low) ((data & INTEL_MASK((high), (low))) >> (low))
-#define GET_FIELD(word, field) (((word)  & field ## _MASK) >> field ## _SHIFT)
-
-/**
- * For use with masked MMIO registers where the upper 16 bits control which
- * of the lower bits are committed to the register.
- */
-#define REG_MASK(value) ((value) << 16)
-
-/* 3D state:
- */
-#define CMD_3D_PRIM                                 0x7b00 /* 3DPRIMITIVE */
-/* DW0 */
-# define GFX4_3DPRIM_TOPOLOGY_TYPE_SHIFT            10
-# define GFX4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15)
-# define GFX4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 15)
-# define GFX7_3DPRIM_INDIRECT_PARAMETER_ENABLE      (1 << 10)
-# define GFX7_3DPRIM_PREDICATE_ENABLE               (1 << 8)
-/* DW1 */
-# define GFX7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
-# define GFX7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 8)
-
-#define BRW_ANISORATIO_2     0
-#define BRW_ANISORATIO_4     1
-#define BRW_ANISORATIO_6     2
-#define BRW_ANISORATIO_8     3
-#define BRW_ANISORATIO_10    4
-#define BRW_ANISORATIO_12    5
-#define BRW_ANISORATIO_14    6
-#define BRW_ANISORATIO_16    7
-
-#define BRW_BLENDFACTOR_ONE                 0x1
-#define BRW_BLENDFACTOR_SRC_COLOR           0x2
-#define BRW_BLENDFACTOR_SRC_ALPHA           0x3
-#define BRW_BLENDFACTOR_DST_ALPHA           0x4
-#define BRW_BLENDFACTOR_DST_COLOR           0x5
-#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE  0x6
-#define BRW_BLENDFACTOR_CONST_COLOR         0x7
-#define BRW_BLENDFACTOR_CONST_ALPHA         0x8
-#define BRW_BLENDFACTOR_SRC1_COLOR          0x9
-#define BRW_BLENDFACTOR_SRC1_ALPHA          0x0A
-#define BRW_BLENDFACTOR_ZERO                0x11
-#define BRW_BLENDFACTOR_INV_SRC_COLOR       0x12
-#define BRW_BLENDFACTOR_INV_SRC_ALPHA       0x13
-#define BRW_BLENDFACTOR_INV_DST_ALPHA       0x14
-#define BRW_BLENDFACTOR_INV_DST_COLOR       0x15
-#define BRW_BLENDFACTOR_INV_CONST_COLOR     0x17
-#define BRW_BLENDFACTOR_INV_CONST_ALPHA     0x18
-#define BRW_BLENDFACTOR_INV_SRC1_COLOR      0x19
-#define BRW_BLENDFACTOR_INV_SRC1_ALPHA      0x1A
-
-#define BRW_BLENDFUNCTION_ADD               0
-#define BRW_BLENDFUNCTION_SUBTRACT          1
-#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT  2
-#define BRW_BLENDFUNCTION_MIN               3
-#define BRW_BLENDFUNCTION_MAX               4
-
-#define BRW_ALPHATEST_FORMAT_UNORM8         0
-#define BRW_ALPHATEST_FORMAT_FLOAT32        1
-
-#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH  0
-#define BRW_CHROMAKEY_REPLACE_BLACK      1
-
-#define BRW_CLIP_API_OGL     0
-#define BRW_CLIP_API_DX      1
-
-#define BRW_CLIP_NDCSPACE     0
-#define BRW_CLIP_SCREENSPACE  1
-
-#define BRW_COMPAREFUNCTION_ALWAYS       0
-#define BRW_COMPAREFUNCTION_NEVER        1
-#define BRW_COMPAREFUNCTION_LESS         2
-#define BRW_COMPAREFUNCTION_EQUAL        3
-#define BRW_COMPAREFUNCTION_LEQUAL       4
-#define BRW_COMPAREFUNCTION_GREATER      5
-#define BRW_COMPAREFUNCTION_NOTEQUAL     6
-#define BRW_COMPAREFUNCTION_GEQUAL       7
-
-#define BRW_COVERAGE_PIXELS_HALF     0
-#define BRW_COVERAGE_PIXELS_1        1
-#define BRW_COVERAGE_PIXELS_2        2
-#define BRW_COVERAGE_PIXELS_4        3
-
-#define BRW_CULLMODE_BOTH        0
-#define BRW_CULLMODE_NONE        1
-#define BRW_CULLMODE_FRONT       2
-#define BRW_CULLMODE_BACK        3
-
-#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM      0
-#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT  1
-
-#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT     0
-#define BRW_DEPTHFORMAT_D32_FLOAT                1
-#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT        2
-#define BRW_DEPTHFORMAT_D24_UNORM_X8_UINT        3 /* GFX5 */
-#define BRW_DEPTHFORMAT_D16_UNORM                5
-
-#define BRW_FLOATING_POINT_IEEE_754        0
-#define BRW_FLOATING_POINT_NON_IEEE_754    1
-
-#define BRW_FRONTWINDING_CW      0
-#define BRW_FRONTWINDING_CCW     1
-
-#define BRW_CUT_INDEX_ENABLE     (1 << 10)
-
-#define BRW_INDEX_BYTE     0
-#define BRW_INDEX_WORD     1
-#define BRW_INDEX_DWORD    2
-
-#define BRW_LOGICOPFUNCTION_CLEAR            0
-#define BRW_LOGICOPFUNCTION_NOR              1
-#define BRW_LOGICOPFUNCTION_AND_INVERTED     2
-#define BRW_LOGICOPFUNCTION_COPY_INVERTED    3
-#define BRW_LOGICOPFUNCTION_AND_REVERSE      4
-#define BRW_LOGICOPFUNCTION_INVERT           5
-#define BRW_LOGICOPFUNCTION_XOR              6
-#define BRW_LOGICOPFUNCTION_NAND             7
-#define BRW_LOGICOPFUNCTION_AND              8
-#define BRW_LOGICOPFUNCTION_EQUIV            9
-#define BRW_LOGICOPFUNCTION_NOOP             10
-#define BRW_LOGICOPFUNCTION_OR_INVERTED      11
-#define BRW_LOGICOPFUNCTION_COPY             12
-#define BRW_LOGICOPFUNCTION_OR_REVERSE       13
-#define BRW_LOGICOPFUNCTION_OR               14
-#define BRW_LOGICOPFUNCTION_SET              15
-
-#define BRW_MAPFILTER_NEAREST        0x0
-#define BRW_MAPFILTER_LINEAR         0x1
-#define BRW_MAPFILTER_ANISOTROPIC    0x2
-
-#define BRW_MIPFILTER_NONE        0
-#define BRW_MIPFILTER_NEAREST     1
-#define BRW_MIPFILTER_LINEAR      3
-
-#define BRW_ADDRESS_ROUNDING_ENABLE_U_MAG	0x20
-#define BRW_ADDRESS_ROUNDING_ENABLE_U_MIN	0x10
-#define BRW_ADDRESS_ROUNDING_ENABLE_V_MAG	0x08
-#define BRW_ADDRESS_ROUNDING_ENABLE_V_MIN	0x04
-#define BRW_ADDRESS_ROUNDING_ENABLE_R_MAG	0x02
-#define BRW_ADDRESS_ROUNDING_ENABLE_R_MIN	0x01
-
-#define BRW_PREFILTER_ALWAYS     0x0
-#define BRW_PREFILTER_NEVER      0x1
-#define BRW_PREFILTER_LESS       0x2
-#define BRW_PREFILTER_EQUAL      0x3
-#define BRW_PREFILTER_LEQUAL     0x4
-#define BRW_PREFILTER_GREATER    0x5
-#define BRW_PREFILTER_NOTEQUAL   0x6
-#define BRW_PREFILTER_GEQUAL     0x7
-
-#define BRW_PROVOKING_VERTEX_0    0
-#define BRW_PROVOKING_VERTEX_1    1
-#define BRW_PROVOKING_VERTEX_2    2
-
-#define BRW_RASTRULE_UPPER_LEFT  0
-#define BRW_RASTRULE_UPPER_RIGHT 1
-/* These are listed as "Reserved, but not seen as useful"
- * in Intel documentation (page 212, "Point Rasterization Rule",
- * section 7.4 "SF Pipeline State Summary", of document
- * "IntelÂ® 965 Express Chipset Family and IntelÂ® G35 Express
- * Chipset Graphics Controller Programmer's Reference Manual,
- * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
- * available at
- *     https://01.org/linuxgraphics/documentation/hardware-specification-prms
- * at the time of this writing).
- *
- * These appear to be supported on at least some
- * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT
- * is useful when using OpenGL to render to a FBO
- * (which has the pixel coordinate Y orientation inverted
- * with respect to the normal OpenGL pixel coordinate system).
- */
-#define BRW_RASTRULE_LOWER_LEFT  2
-#define BRW_RASTRULE_LOWER_RIGHT 3
-
-#define BRW_RENDERTARGET_CLAMPRANGE_UNORM    0
-#define BRW_RENDERTARGET_CLAMPRANGE_SNORM    1
-#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT   2
-
-#define BRW_STENCILOP_KEEP               0
-#define BRW_STENCILOP_ZERO               1
-#define BRW_STENCILOP_REPLACE            2
-#define BRW_STENCILOP_INCRSAT            3
-#define BRW_STENCILOP_DECRSAT            4
-#define BRW_STENCILOP_INCR               5
-#define BRW_STENCILOP_DECR               6
-#define BRW_STENCILOP_INVERT             7
-
-/* Surface state DW0 */
-#define GFX8_SURFACE_IS_ARRAY                       (1 << 28)
-#define GFX8_SURFACE_VALIGN_4                       (1 << 16)
-#define GFX8_SURFACE_VALIGN_8                       (2 << 16)
-#define GFX8_SURFACE_VALIGN_16                      (3 << 16)
-#define GFX8_SURFACE_HALIGN_4                       (1 << 14)
-#define GFX8_SURFACE_HALIGN_8                       (2 << 14)
-#define GFX8_SURFACE_HALIGN_16                      (3 << 14)
-#define GFX8_SURFACE_TILING_NONE                    (0 << 12)
-#define GFX8_SURFACE_TILING_W                       (1 << 12)
-#define GFX8_SURFACE_TILING_X                       (2 << 12)
-#define GFX8_SURFACE_TILING_Y                       (3 << 12)
-#define GFX8_SURFACE_SAMPLER_L2_BYPASS_DISABLE      (1 << 9)
-#define BRW_SURFACE_RC_READ_WRITE	(1 << 8)
-#define BRW_SURFACE_MIPLAYOUT_SHIFT	10
-#define BRW_SURFACE_MIPMAPLAYOUT_BELOW   0
-#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT   1
-#define BRW_SURFACE_CUBEFACE_ENABLES	0x3f
-#define BRW_SURFACE_BLEND_ENABLED	(1 << 13)
-#define BRW_SURFACE_WRITEDISABLE_B_SHIFT	14
-#define BRW_SURFACE_WRITEDISABLE_G_SHIFT	15
-#define BRW_SURFACE_WRITEDISABLE_R_SHIFT	16
-#define BRW_SURFACE_WRITEDISABLE_A_SHIFT	17
-
-#define GFX9_SURFACE_ASTC_HDR_FORMAT_BIT                 0x100
-
-#define BRW_SURFACE_FORMAT_SHIFT	18
-#define BRW_SURFACE_FORMAT_MASK		INTEL_MASK(26, 18)
-
-#define BRW_SURFACERETURNFORMAT_FLOAT32  0
-#define BRW_SURFACERETURNFORMAT_S1       1
-
-#define BRW_SURFACE_TYPE_SHIFT		29
-#define BRW_SURFACE_TYPE_MASK		INTEL_MASK(31, 29)
-#define BRW_SURFACE_1D      0
-#define BRW_SURFACE_2D      1
-#define BRW_SURFACE_3D      2
-#define BRW_SURFACE_CUBE    3
-#define BRW_SURFACE_BUFFER  4
-#define BRW_SURFACE_NULL    7
-
-#define GFX7_SURFACE_IS_ARRAY           (1 << 28)
-#define GFX7_SURFACE_VALIGN_2           (0 << 16)
-#define GFX7_SURFACE_VALIGN_4           (1 << 16)
-#define GFX7_SURFACE_HALIGN_4           (0 << 15)
-#define GFX7_SURFACE_HALIGN_8           (1 << 15)
-#define GFX7_SURFACE_TILING_NONE        (0 << 13)
-#define GFX7_SURFACE_TILING_X           (2 << 13)
-#define GFX7_SURFACE_TILING_Y           (3 << 13)
-#define GFX7_SURFACE_ARYSPC_FULL	(0 << 10)
-#define GFX7_SURFACE_ARYSPC_LOD0	(1 << 10)
-
-/* Surface state DW2 */
-#define BRW_SURFACE_HEIGHT_SHIFT	19
-#define BRW_SURFACE_HEIGHT_MASK		INTEL_MASK(31, 19)
-#define BRW_SURFACE_WIDTH_SHIFT		6
-#define BRW_SURFACE_WIDTH_MASK		INTEL_MASK(18, 6)
-#define BRW_SURFACE_LOD_SHIFT		2
-#define BRW_SURFACE_LOD_MASK		INTEL_MASK(5, 2)
-#define GFX7_SURFACE_HEIGHT_SHIFT       16
-#define GFX7_SURFACE_HEIGHT_MASK        INTEL_MASK(29, 16)
-#define GFX7_SURFACE_WIDTH_SHIFT        0
-#define GFX7_SURFACE_WIDTH_MASK         INTEL_MASK(13, 0)
-
-/* Surface state DW3 */
-#define BRW_SURFACE_DEPTH_SHIFT		21
-#define BRW_SURFACE_DEPTH_MASK		INTEL_MASK(31, 21)
-#define BRW_SURFACE_PITCH_SHIFT		3
-#define BRW_SURFACE_PITCH_MASK		INTEL_MASK(19, 3)
-#define BRW_SURFACE_TILED		(1 << 1)
-#define BRW_SURFACE_TILED_Y		(1 << 0)
-#define HSW_SURFACE_IS_INTEGER_FORMAT   (1 << 18)
-
-/* Surface state DW4 */
-#define BRW_SURFACE_MIN_LOD_SHIFT	28
-#define BRW_SURFACE_MIN_LOD_MASK	INTEL_MASK(31, 28)
-#define BRW_SURFACE_MIN_ARRAY_ELEMENT_SHIFT	17
-#define BRW_SURFACE_MIN_ARRAY_ELEMENT_MASK	INTEL_MASK(27, 17)
-#define BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT	8
-#define BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT_MASK	INTEL_MASK(16, 8)
-#define BRW_SURFACE_MULTISAMPLECOUNT_1  (0 << 4)
-#define BRW_SURFACE_MULTISAMPLECOUNT_4  (2 << 4)
-#define GFX7_SURFACE_MULTISAMPLECOUNT_1         (0 << 3)
-#define GFX8_SURFACE_MULTISAMPLECOUNT_2         (1 << 3)
-#define GFX7_SURFACE_MULTISAMPLECOUNT_4         (2 << 3)
-#define GFX7_SURFACE_MULTISAMPLECOUNT_8         (3 << 3)
-#define GFX8_SURFACE_MULTISAMPLECOUNT_16        (4 << 3)
-#define GFX7_SURFACE_MSFMT_MSS                  (0 << 6)
-#define GFX7_SURFACE_MSFMT_DEPTH_STENCIL        (1 << 6)
-#define GFX7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT	18
-#define GFX7_SURFACE_MIN_ARRAY_ELEMENT_MASK     INTEL_MASK(28, 18)
-#define GFX7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT	7
-#define GFX7_SURFACE_RENDER_TARGET_VIEW_EXTENT_MASK   INTEL_MASK(17, 7)
-
-/* Surface state DW5 */
-#define BRW_SURFACE_X_OFFSET_SHIFT		25
-#define BRW_SURFACE_X_OFFSET_MASK		INTEL_MASK(31, 25)
-#define BRW_SURFACE_VERTICAL_ALIGN_ENABLE	(1 << 24)
-#define BRW_SURFACE_Y_OFFSET_SHIFT		20
-#define BRW_SURFACE_Y_OFFSET_MASK		INTEL_MASK(23, 20)
-#define GFX7_SURFACE_MIN_LOD_SHIFT              4
-#define GFX7_SURFACE_MIN_LOD_MASK               INTEL_MASK(7, 4)
-#define GFX8_SURFACE_Y_OFFSET_SHIFT		21
-#define GFX8_SURFACE_Y_OFFSET_MASK		INTEL_MASK(23, 21)
-
-#define GFX9_SURFACE_MIP_TAIL_START_LOD_SHIFT      8
-#define GFX9_SURFACE_MIP_TAIL_START_LOD_MASK       INTEL_MASK(11, 8)
-
-/* Surface state DW6 */
-#define GFX7_SURFACE_MCS_ENABLE                 (1 << 0)
-#define GFX7_SURFACE_MCS_PITCH_SHIFT            3
-#define GFX7_SURFACE_MCS_PITCH_MASK             INTEL_MASK(11, 3)
-#define GFX8_SURFACE_AUX_QPITCH_SHIFT           16
-#define GFX8_SURFACE_AUX_QPITCH_MASK            INTEL_MASK(30, 16)
-#define GFX8_SURFACE_AUX_PITCH_SHIFT            3
-#define GFX8_SURFACE_AUX_PITCH_MASK             INTEL_MASK(11, 3)
-#define GFX8_SURFACE_AUX_MODE_MASK              INTEL_MASK(2, 0)
-
-#define GFX8_SURFACE_AUX_MODE_NONE              0
-#define GFX8_SURFACE_AUX_MODE_MCS               1
-#define GFX8_SURFACE_AUX_MODE_APPEND            2
-#define GFX8_SURFACE_AUX_MODE_HIZ               3
-#define GFX9_SURFACE_AUX_MODE_CCS_E             5
-
-/* Surface state DW7 */
-#define GFX9_SURFACE_RT_COMPRESSION_SHIFT       30
-#define GFX9_SURFACE_RT_COMPRESSION_MASK        INTEL_MASK(30, 30)
-#define GFX7_SURFACE_CLEAR_COLOR_SHIFT		28
-#define GFX7_SURFACE_SCS_R_SHIFT                25
-#define GFX7_SURFACE_SCS_R_MASK                 INTEL_MASK(27, 25)
-#define GFX7_SURFACE_SCS_G_SHIFT                22
-#define GFX7_SURFACE_SCS_G_MASK                 INTEL_MASK(24, 22)
-#define GFX7_SURFACE_SCS_B_SHIFT                19
-#define GFX7_SURFACE_SCS_B_MASK                 INTEL_MASK(21, 19)
-#define GFX7_SURFACE_SCS_A_SHIFT                16
-#define GFX7_SURFACE_SCS_A_MASK                 INTEL_MASK(18, 16)
-
-/* The actual swizzle values/what channel to use */
-#define HSW_SCS_ZERO                     0
-#define HSW_SCS_ONE                      1
-#define HSW_SCS_RED                      4
-#define HSW_SCS_GREEN                    5
-#define HSW_SCS_BLUE                     6
-#define HSW_SCS_ALPHA                    7
-
-/* SAMPLER_STATE DW0 */
-#define BRW_SAMPLER_DISABLE                     (1 << 31)
-#define BRW_SAMPLER_LOD_PRECLAMP_ENABLE         (1 << 28)
-#define GFX6_SAMPLER_MIN_MAG_NOT_EQUAL          (1 << 27) /* Gfx6 only */
-#define BRW_SAMPLER_BASE_MIPLEVEL_MASK          INTEL_MASK(26, 22)
-#define BRW_SAMPLER_BASE_MIPLEVEL_SHIFT         22
-#define BRW_SAMPLER_MIP_FILTER_MASK             INTEL_MASK(21, 20)
-#define BRW_SAMPLER_MIP_FILTER_SHIFT            20
-#define BRW_SAMPLER_MAG_FILTER_MASK             INTEL_MASK(19, 17)
-#define BRW_SAMPLER_MAG_FILTER_SHIFT            17
-#define BRW_SAMPLER_MIN_FILTER_MASK             INTEL_MASK(16, 14)
-#define BRW_SAMPLER_MIN_FILTER_SHIFT            14
-#define GFX4_SAMPLER_LOD_BIAS_MASK              INTEL_MASK(13, 3)
-#define GFX4_SAMPLER_LOD_BIAS_SHIFT             3
-#define GFX4_SAMPLER_SHADOW_FUNCTION_MASK       INTEL_MASK(2, 0)
-#define GFX4_SAMPLER_SHADOW_FUNCTION_SHIFT      0
-
-#define GFX7_SAMPLER_LOD_BIAS_MASK              INTEL_MASK(13, 1)
-#define GFX7_SAMPLER_LOD_BIAS_SHIFT             1
-#define GFX7_SAMPLER_EWA_ANISOTROPIC_ALGORITHM  (1 << 0)
-
-/* SAMPLER_STATE DW1 */
-#define GFX4_SAMPLER_MIN_LOD_MASK               INTEL_MASK(31, 22)
-#define GFX4_SAMPLER_MIN_LOD_SHIFT              22
-#define GFX4_SAMPLER_MAX_LOD_MASK               INTEL_MASK(21, 12)
-#define GFX4_SAMPLER_MAX_LOD_SHIFT              12
-#define GFX4_SAMPLER_CUBE_CONTROL_OVERRIDE      (1 << 9)
-/* Wrap modes are in DW1 on Gfx4-6 and DW3 on Gfx7+ */
-#define BRW_SAMPLER_TCX_WRAP_MODE_MASK          INTEL_MASK(8, 6)
-#define BRW_SAMPLER_TCX_WRAP_MODE_SHIFT         6
-#define BRW_SAMPLER_TCY_WRAP_MODE_MASK          INTEL_MASK(5, 3)
-#define BRW_SAMPLER_TCY_WRAP_MODE_SHIFT         3
-#define BRW_SAMPLER_TCZ_WRAP_MODE_MASK          INTEL_MASK(2, 0)
-#define BRW_SAMPLER_TCZ_WRAP_MODE_SHIFT         0
-
-#define GFX7_SAMPLER_MIN_LOD_MASK               INTEL_MASK(31, 20)
-#define GFX7_SAMPLER_MIN_LOD_SHIFT              20
-#define GFX7_SAMPLER_MAX_LOD_MASK               INTEL_MASK(19, 8)
-#define GFX7_SAMPLER_MAX_LOD_SHIFT              8
-#define GFX7_SAMPLER_SHADOW_FUNCTION_MASK       INTEL_MASK(3, 1)
-#define GFX7_SAMPLER_SHADOW_FUNCTION_SHIFT      1
-#define GFX7_SAMPLER_CUBE_CONTROL_OVERRIDE      (1 << 0)
-
-/* SAMPLER_STATE DW2 - border color pointer */
-
-/* SAMPLER_STATE DW3 */
-#define BRW_SAMPLER_MAX_ANISOTROPY_MASK         INTEL_MASK(21, 19)
-#define BRW_SAMPLER_MAX_ANISOTROPY_SHIFT        19
-#define BRW_SAMPLER_ADDRESS_ROUNDING_MASK       INTEL_MASK(18, 13)
-#define BRW_SAMPLER_ADDRESS_ROUNDING_SHIFT      13
-#define GFX7_SAMPLER_NON_NORMALIZED_COORDINATES (1 << 10)
-/* Gfx7+ wrap modes reuse the same BRW_SAMPLER_TC*_WRAP_MODE enums. */
-#define GFX6_SAMPLER_NON_NORMALIZED_COORDINATES (1 << 0)
-
-enum brw_wrap_mode {
-   BRW_TEXCOORDMODE_WRAP         = 0,
-   BRW_TEXCOORDMODE_MIRROR       = 1,
-   BRW_TEXCOORDMODE_CLAMP        = 2,
-   BRW_TEXCOORDMODE_CUBE         = 3,
-   BRW_TEXCOORDMODE_CLAMP_BORDER = 4,
-   BRW_TEXCOORDMODE_MIRROR_ONCE  = 5,
-   GFX8_TEXCOORDMODE_HALF_BORDER = 6,
-};
-
-#define BRW_THREAD_PRIORITY_NORMAL   0
-#define BRW_THREAD_PRIORITY_HIGH     1
-
-#define BRW_TILEWALK_XMAJOR                 0
-#define BRW_TILEWALK_YMAJOR                 1
-
-#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS  0
-#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS  1
-
-
-#define CMD_URB_FENCE                 0x6000
-#define CMD_CS_URB_STATE              0x6001
-#define CMD_CONST_BUFFER              0x6002
-
-#define CMD_STATE_BASE_ADDRESS        0x6101
-#define CMD_STATE_SIP                 0x6102
-#define CMD_PIPELINE_SELECT_965       0x6104
-#define CMD_PIPELINE_SELECT_GM45      0x6904
-
-#define _3DSTATE_PIPELINED_POINTERS		0x7800
-#define _3DSTATE_BINDING_TABLE_POINTERS		0x7801
-# define GFX6_BINDING_TABLE_MODIFY_VS	(1 << 8)
-# define GFX6_BINDING_TABLE_MODIFY_GS	(1 << 9)
-# define GFX6_BINDING_TABLE_MODIFY_PS	(1 << 12)
-
-#define _3DSTATE_BINDING_TABLE_POINTERS_VS	0x7826 /* GFX7+ */
-#define _3DSTATE_BINDING_TABLE_POINTERS_HS	0x7827 /* GFX7+ */
-#define _3DSTATE_BINDING_TABLE_POINTERS_DS	0x7828 /* GFX7+ */
-#define _3DSTATE_BINDING_TABLE_POINTERS_GS	0x7829 /* GFX7+ */
-#define _3DSTATE_BINDING_TABLE_POINTERS_PS	0x782A /* GFX7+ */
-
-#define _3DSTATE_SAMPLER_STATE_POINTERS		0x7802 /* GFX6+ */
-# define PS_SAMPLER_STATE_CHANGE				(1 << 12)
-# define GS_SAMPLER_STATE_CHANGE				(1 << 9)
-# define VS_SAMPLER_STATE_CHANGE				(1 << 8)
-/* DW1: VS */
-/* DW2: GS */
-/* DW3: PS */
-
-#define _3DSTATE_SAMPLER_STATE_POINTERS_VS	0x782B /* GFX7+ */
-#define _3DSTATE_SAMPLER_STATE_POINTERS_HS	0x782C /* GFX7+ */
-#define _3DSTATE_SAMPLER_STATE_POINTERS_DS	0x782D /* GFX7+ */
-#define _3DSTATE_SAMPLER_STATE_POINTERS_GS	0x782E /* GFX7+ */
-#define _3DSTATE_SAMPLER_STATE_POINTERS_PS	0x782F /* GFX7+ */
-
-#define _3DSTATE_VERTEX_BUFFERS       0x7808
-# define BRW_VB0_INDEX_SHIFT		27
-# define GFX6_VB0_INDEX_SHIFT		26
-# define BRW_VB0_ACCESS_VERTEXDATA	(0 << 26)
-# define BRW_VB0_ACCESS_INSTANCEDATA	(1 << 26)
-# define GFX6_VB0_ACCESS_VERTEXDATA	(0 << 20)
-# define GFX6_VB0_ACCESS_INSTANCEDATA	(1 << 20)
-# define GFX7_VB0_ADDRESS_MODIFYENABLE  (1 << 14)
-# define BRW_VB0_PITCH_SHIFT		0
-
-#define _3DSTATE_VERTEX_ELEMENTS      0x7809
-# define BRW_VE0_INDEX_SHIFT		27
-# define GFX6_VE0_INDEX_SHIFT		26
-# define BRW_VE0_FORMAT_SHIFT		16
-# define BRW_VE0_VALID			(1 << 26)
-# define GFX6_VE0_VALID			(1 << 25)
-# define GFX6_VE0_EDGE_FLAG_ENABLE	(1 << 15)
-# define BRW_VE0_SRC_OFFSET_SHIFT	0
-# define BRW_VE1_COMPONENT_NOSTORE	0
-# define BRW_VE1_COMPONENT_STORE_SRC	1
-# define BRW_VE1_COMPONENT_STORE_0	2
-# define BRW_VE1_COMPONENT_STORE_1_FLT	3
-# define BRW_VE1_COMPONENT_STORE_1_INT	4
-# define BRW_VE1_COMPONENT_STORE_VID	5
-# define BRW_VE1_COMPONENT_STORE_IID	6
-# define BRW_VE1_COMPONENT_STORE_PID	7
-# define BRW_VE1_COMPONENT_0_SHIFT	28
-# define BRW_VE1_COMPONENT_1_SHIFT	24
-# define BRW_VE1_COMPONENT_2_SHIFT	20
-# define BRW_VE1_COMPONENT_3_SHIFT	16
-# define BRW_VE1_DST_OFFSET_SHIFT	0
-
-#define CMD_INDEX_BUFFER              0x780a
-#define GFX4_3DSTATE_VF_STATISTICS		0x780b
-#define GM45_3DSTATE_VF_STATISTICS		0x680b
-#define _3DSTATE_CC_STATE_POINTERS		0x780e /* GFX6+ */
-#define _3DSTATE_BLEND_STATE_POINTERS		0x7824 /* GFX7+ */
-#define _3DSTATE_DEPTH_STENCIL_STATE_POINTERS	0x7825 /* GFX7+ */
-
-#define _3DSTATE_URB				0x7805 /* GFX6 */
-# define GFX6_URB_VS_SIZE_SHIFT				16
-# define GFX6_URB_VS_ENTRIES_SHIFT			0
-# define GFX6_URB_GS_ENTRIES_SHIFT			8
-# define GFX6_URB_GS_SIZE_SHIFT				0
-
-#define _3DSTATE_VF                             0x780c /* GFX7.5+ */
-#define HSW_CUT_INDEX_ENABLE                            (1 << 8)
-
-#define _3DSTATE_VF_INSTANCING                  0x7849 /* GFX8+ */
-# define GFX8_VF_INSTANCING_ENABLE                      (1 << 8)
-
-#define _3DSTATE_VF_SGVS                        0x784a /* GFX8+ */
-# define GFX8_SGVS_ENABLE_INSTANCE_ID                   (1 << 31)
-# define GFX8_SGVS_INSTANCE_ID_COMPONENT_SHIFT          29
-# define GFX8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT     16
-# define GFX8_SGVS_ENABLE_VERTEX_ID                     (1 << 15)
-# define GFX8_SGVS_VERTEX_ID_COMPONENT_SHIFT            13
-# define GFX8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT       0
-
-#define _3DSTATE_VF_TOPOLOGY                    0x784b /* GFX8+ */
-
-#define _3DSTATE_WM_CHROMAKEY			0x784c /* GFX8+ */
-
-#define _3DSTATE_URB_VS                         0x7830 /* GFX7+ */
-#define _3DSTATE_URB_HS                         0x7831 /* GFX7+ */
-#define _3DSTATE_URB_DS                         0x7832 /* GFX7+ */
-#define _3DSTATE_URB_GS                         0x7833 /* GFX7+ */
-# define GFX7_URB_ENTRY_SIZE_SHIFT                      16
-# define GFX7_URB_STARTING_ADDRESS_SHIFT                25
-
-#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS         0x7912 /* GFX7+ */
-#define _3DSTATE_PUSH_CONSTANT_ALLOC_HS         0x7913 /* GFX7+ */
-#define _3DSTATE_PUSH_CONSTANT_ALLOC_DS         0x7914 /* GFX7+ */
-#define _3DSTATE_PUSH_CONSTANT_ALLOC_GS         0x7915 /* GFX7+ */
-#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS         0x7916 /* GFX7+ */
-# define GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT         16
-
-#define _3DSTATE_VIEWPORT_STATE_POINTERS	0x780d /* GFX6+ */
-# define GFX6_CC_VIEWPORT_MODIFY			(1 << 12)
-# define GFX6_SF_VIEWPORT_MODIFY			(1 << 11)
-# define GFX6_CLIP_VIEWPORT_MODIFY			(1 << 10)
-# define GFX6_NUM_VIEWPORTS				16
-
-#define _3DSTATE_VIEWPORT_STATE_POINTERS_CC	0x7823 /* GFX7+ */
-#define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL	0x7821 /* GFX7+ */
-
-#define _3DSTATE_SCISSOR_STATE_POINTERS		0x780f /* GFX6+ */
-
-#define _3DSTATE_VS				0x7810 /* GFX6+ */
-/* DW2 */
-# define GFX6_VS_SPF_MODE				(1 << 31)
-# define GFX6_VS_VECTOR_MASK_ENABLE			(1 << 30)
-# define GFX6_VS_SAMPLER_COUNT_SHIFT			27
-# define GFX6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
-# define GFX6_VS_FLOATING_POINT_MODE_IEEE_754		(0 << 16)
-# define GFX6_VS_FLOATING_POINT_MODE_ALT		(1 << 16)
-# define HSW_VS_UAV_ACCESS_ENABLE                       (1 << 12)
-/* DW4 */
-# define GFX6_VS_DISPATCH_START_GRF_SHIFT		20
-# define GFX6_VS_URB_READ_LENGTH_SHIFT			11
-# define GFX6_VS_URB_ENTRY_READ_OFFSET_SHIFT		4
-/* DW5 */
-# define GFX6_VS_MAX_THREADS_SHIFT			25
-# define HSW_VS_MAX_THREADS_SHIFT			23
-# define GFX6_VS_STATISTICS_ENABLE			(1 << 10)
-# define GFX6_VS_CACHE_DISABLE				(1 << 1)
-# define GFX6_VS_ENABLE					(1 << 0)
-/* Gfx8+ DW7 */
-# define GFX8_VS_SIMD8_ENABLE                           (1 << 2)
-/* Gfx8+ DW8 */
-# define GFX8_VS_URB_ENTRY_OUTPUT_OFFSET_SHIFT          21
-# define GFX8_VS_URB_OUTPUT_LENGTH_SHIFT                16
-# define GFX8_VS_USER_CLIP_DISTANCE_SHIFT               8
-
-#define _3DSTATE_GS		      		0x7811 /* GFX6+ */
-/* DW2 */
-# define GFX6_GS_SPF_MODE				(1 << 31)
-# define GFX6_GS_VECTOR_MASK_ENABLE			(1 << 30)
-# define GFX6_GS_SAMPLER_COUNT_SHIFT			27
-# define GFX6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
-# define GFX6_GS_FLOATING_POINT_MODE_IEEE_754		(0 << 16)
-# define GFX6_GS_FLOATING_POINT_MODE_ALT		(1 << 16)
-# define HSW_GS_UAV_ACCESS_ENABLE       		(1 << 12)
-/* DW4 */
-# define GFX7_GS_OUTPUT_VERTEX_SIZE_SHIFT		23
-# define GFX7_GS_OUTPUT_TOPOLOGY_SHIFT			17
-# define GFX6_GS_URB_READ_LENGTH_SHIFT			11
-# define GFX7_GS_INCLUDE_VERTEX_HANDLES		        (1 << 10)
-# define GFX6_GS_URB_ENTRY_READ_OFFSET_SHIFT		4
-# define GFX6_GS_DISPATCH_START_GRF_SHIFT		0
-/* DW5 */
-# define GFX6_GS_MAX_THREADS_SHIFT			25
-# define HSW_GS_MAX_THREADS_SHIFT			24
-# define IVB_GS_CONTROL_DATA_FORMAT_SHIFT		24
-# define GFX7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT		0
-# define GFX7_GS_CONTROL_DATA_FORMAT_GSCTL_SID		1
-# define GFX7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT		20
-# define GFX7_GS_INSTANCE_CONTROL_SHIFT			15
-# define GFX7_GS_DISPATCH_MODE_SHIFT                    11
-# define GFX7_GS_DISPATCH_MODE_MASK                     INTEL_MASK(12, 11)
-# define GFX6_GS_STATISTICS_ENABLE			(1 << 10)
-# define GFX6_GS_SO_STATISTICS_ENABLE			(1 << 9)
-# define GFX6_GS_RENDERING_ENABLE			(1 << 8)
-# define GFX7_GS_INCLUDE_PRIMITIVE_ID			(1 << 4)
-# define GFX7_GS_REORDER_TRAILING			(1 << 2)
-# define GFX7_GS_ENABLE					(1 << 0)
-/* DW6 */
-# define HSW_GS_CONTROL_DATA_FORMAT_SHIFT		31
-# define GFX6_GS_REORDER				(1 << 30)
-# define GFX6_GS_DISCARD_ADJACENCY			(1 << 29)
-# define GFX6_GS_SVBI_PAYLOAD_ENABLE			(1 << 28)
-# define GFX6_GS_SVBI_POSTINCREMENT_ENABLE		(1 << 27)
-# define GFX6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT		16
-# define GFX6_GS_SVBI_POSTINCREMENT_VALUE_MASK		INTEL_MASK(25, 16)
-# define GFX6_GS_ENABLE					(1 << 15)
-
-/* Gfx8+ DW8 */
-# define GFX8_GS_STATIC_OUTPUT                          (1 << 30)
-# define GFX8_GS_STATIC_VERTEX_COUNT_SHIFT              16
-# define GFX8_GS_STATIC_VERTEX_COUNT_MASK               INTEL_MASK(26, 16)
-
-/* Gfx8+ DW9 */
-# define GFX8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT          21
-# define GFX8_GS_URB_OUTPUT_LENGTH_SHIFT                16
-# define GFX8_GS_USER_CLIP_DISTANCE_SHIFT               8
-
-# define BRW_GS_EDGE_INDICATOR_0			(1 << 8)
-# define BRW_GS_EDGE_INDICATOR_1			(1 << 9)
-
-#define _3DSTATE_HS                             0x781B /* GFX7+ */
-/* DW1 */
-# define GFX7_HS_SAMPLER_COUNT_MASK                     INTEL_MASK(29, 27)
-# define GFX7_HS_SAMPLER_COUNT_SHIFT                    27
-# define GFX7_HS_BINDING_TABLE_ENTRY_COUNT_MASK         INTEL_MASK(25, 18)
-# define GFX7_HS_BINDING_TABLE_ENTRY_COUNT_SHIFT        18
-# define GFX7_HS_FLOATING_POINT_MODE_IEEE_754           (0 << 16)
-# define GFX7_HS_FLOATING_POINT_MODE_ALT                (1 << 16)
-# define GFX7_HS_MAX_THREADS_SHIFT                      0
-/* DW2 */
-# define GFX7_HS_ENABLE                                 (1 << 31)
-# define GFX7_HS_STATISTICS_ENABLE                      (1 << 29)
-# define GFX8_HS_MAX_THREADS_SHIFT                      8
-# define GFX7_HS_INSTANCE_COUNT_MASK                    INTEL_MASK(3, 0)
-# define GFX7_HS_INSTANCE_COUNT_SHIFT                   0
-/* DW5 */
-# define GFX7_HS_SINGLE_PROGRAM_FLOW                    (1 << 27)
-# define GFX7_HS_VECTOR_MASK_ENABLE                     (1 << 26)
-# define HSW_HS_ACCESSES_UAV                            (1 << 25)
-# define GFX7_HS_INCLUDE_VERTEX_HANDLES                 (1 << 24)
-# define GFX7_HS_DISPATCH_START_GRF_MASK                INTEL_MASK(23, 19)
-# define GFX7_HS_DISPATCH_START_GRF_SHIFT               19
-# define GFX7_HS_URB_READ_LENGTH_MASK                   INTEL_MASK(16, 11)
-# define GFX7_HS_URB_READ_LENGTH_SHIFT                  11
-# define GFX7_HS_URB_ENTRY_READ_OFFSET_MASK             INTEL_MASK(9, 4)
-# define GFX7_HS_URB_ENTRY_READ_OFFSET_SHIFT            4
-
-#define _3DSTATE_TE                             0x781C /* GFX7+ */
-/* DW1 */
-# define GFX7_TE_PARTITIONING_SHIFT                     12
-# define GFX7_TE_OUTPUT_TOPOLOGY_SHIFT                  8
-# define GFX7_TE_DOMAIN_SHIFT                           4
-//# define GFX7_TE_MODE_SW                                (1 << 1)
-# define GFX7_TE_ENABLE                                 (1 << 0)
-
-#define _3DSTATE_DS                             0x781D /* GFX7+ */
-/* DW2 */
-# define GFX7_DS_SINGLE_DOMAIN_POINT_DISPATCH           (1 << 31)
-# define GFX7_DS_VECTOR_MASK_ENABLE                     (1 << 30)
-# define GFX7_DS_SAMPLER_COUNT_MASK                     INTEL_MASK(29, 27)
-# define GFX7_DS_SAMPLER_COUNT_SHIFT                    27
-# define GFX7_DS_BINDING_TABLE_ENTRY_COUNT_MASK         INTEL_MASK(25, 18)
-# define GFX7_DS_BINDING_TABLE_ENTRY_COUNT_SHIFT        18
-# define GFX7_DS_FLOATING_POINT_MODE_IEEE_754           (0 << 16)
-# define GFX7_DS_FLOATING_POINT_MODE_ALT                (1 << 16)
-# define HSW_DS_ACCESSES_UAV                            (1 << 14)
-/* DW4 */
-# define GFX7_DS_DISPATCH_START_GRF_MASK                INTEL_MASK(24, 20)
-# define GFX7_DS_DISPATCH_START_GRF_SHIFT               20
-# define GFX7_DS_URB_READ_LENGTH_MASK                   INTEL_MASK(17, 11)
-# define GFX7_DS_URB_READ_LENGTH_SHIFT                  11
-# define GFX7_DS_URB_ENTRY_READ_OFFSET_MASK             INTEL_MASK(9, 4)
-# define GFX7_DS_URB_ENTRY_READ_OFFSET_SHIFT            4
-/* DW5 */
-# define GFX7_DS_MAX_THREADS_SHIFT                      25
-# define HSW_DS_MAX_THREADS_SHIFT                       21
-# define GFX7_DS_STATISTICS_ENABLE                      (1 << 10)
-# define GFX7_DS_SIMD8_DISPATCH_ENABLE                  (1 << 3)
-# define GFX7_DS_COMPUTE_W_COORDINATE_ENABLE            (1 << 2)
-# define GFX7_DS_CACHE_DISABLE                          (1 << 1)
-# define GFX7_DS_ENABLE                                 (1 << 0)
-/* Gfx8+ DW8 */
-# define GFX8_DS_URB_ENTRY_OUTPUT_OFFSET_MASK           INTEL_MASK(26, 21)
-# define GFX8_DS_URB_ENTRY_OUTPUT_OFFSET_SHIFT          21
-# define GFX8_DS_URB_OUTPUT_LENGTH_MASK                 INTEL_MASK(20, 16)
-# define GFX8_DS_URB_OUTPUT_LENGTH_SHIFT                16
-# define GFX8_DS_USER_CLIP_DISTANCE_MASK                INTEL_MASK(15, 8)
-# define GFX8_DS_USER_CLIP_DISTANCE_SHIFT               8
-# define GFX8_DS_USER_CULL_DISTANCE_MASK                INTEL_MASK(7, 0)
-# define GFX8_DS_USER_CULL_DISTANCE_SHIFT               0
-
-
-#define _3DSTATE_CLIP				0x7812 /* GFX6+ */
-/* DW1 */
-# define GFX7_CLIP_WINDING_CW                           (0 << 20)
-# define GFX7_CLIP_WINDING_CCW                          (1 << 20)
-# define GFX7_CLIP_VERTEX_SUBPIXEL_PRECISION_8          (0 << 19)
-# define GFX7_CLIP_VERTEX_SUBPIXEL_PRECISION_4          (1 << 19)
-# define GFX7_CLIP_EARLY_CULL                           (1 << 18)
-# define GFX8_CLIP_FORCE_USER_CLIP_DISTANCE_BITMASK     (1 << 17)
-# define GFX7_CLIP_CULLMODE_BOTH                        (0 << 16)
-# define GFX7_CLIP_CULLMODE_NONE                        (1 << 16)
-# define GFX7_CLIP_CULLMODE_FRONT                       (2 << 16)
-# define GFX7_CLIP_CULLMODE_BACK                        (3 << 16)
-# define GFX6_CLIP_STATISTICS_ENABLE			(1 << 10)
-/**
- * Just does cheap culling based on the clip distance.  Bits must be
- * disjoint with USER_CLIP_CLIP_DISTANCE bits.
- */
-# define GFX6_USER_CLIP_CULL_DISTANCES_SHIFT		0
-/* DW2 */
-# define GFX6_CLIP_ENABLE				(1 << 31)
-# define GFX6_CLIP_API_OGL				(0 << 30)
-# define GFX6_CLIP_API_D3D				(1 << 30)
-# define GFX6_CLIP_XY_TEST				(1 << 28)
-# define GFX6_CLIP_Z_TEST				(1 << 27)
-# define GFX6_CLIP_GB_TEST				(1 << 26)
-/** 8-bit field of which user clip distances to clip aganist. */
-# define GFX6_USER_CLIP_CLIP_DISTANCES_SHIFT		16
-# define GFX6_CLIP_MODE_NORMAL				(0 << 13)
-# define GFX6_CLIP_MODE_REJECT_ALL			(3 << 13)
-# define GFX6_CLIP_MODE_ACCEPT_ALL			(4 << 13)
-# define GFX6_CLIP_PERSPECTIVE_DIVIDE_DISABLE		(1 << 9)
-# define GFX6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE	(1 << 8)
-# define GFX6_CLIP_TRI_PROVOKE_SHIFT			4
-# define GFX6_CLIP_LINE_PROVOKE_SHIFT			2
-# define GFX6_CLIP_TRIFAN_PROVOKE_SHIFT			0
-/* DW3 */
-# define GFX6_CLIP_MIN_POINT_WIDTH_SHIFT		17
-# define GFX6_CLIP_MAX_POINT_WIDTH_SHIFT		6
-# define GFX6_CLIP_FORCE_ZERO_RTAINDEX			(1 << 5)
-# define GFX6_CLIP_MAX_VP_INDEX_MASK			INTEL_MASK(3, 0)
-
-#define _3DSTATE_SF				0x7813 /* GFX6+ */
-/* DW1 (for gfx6) */
-# define GFX6_SF_NUM_OUTPUTS_SHIFT			22
-# define GFX6_SF_SWIZZLE_ENABLE				(1 << 21)
-# define GFX6_SF_POINT_SPRITE_UPPERLEFT			(0 << 20)
-# define GFX6_SF_POINT_SPRITE_LOWERLEFT			(1 << 20)
-# define GFX9_SF_LINE_WIDTH_SHIFT			12 /* U11.7 */
-# define GFX6_SF_URB_ENTRY_READ_LENGTH_SHIFT		11
-# define GFX6_SF_URB_ENTRY_READ_OFFSET_SHIFT		4
-/* DW2 */
-# define GFX6_SF_LEGACY_GLOBAL_DEPTH_BIAS		(1 << 11)
-# define GFX6_SF_STATISTICS_ENABLE			(1 << 10)
-# define GFX6_SF_GLOBAL_DEPTH_OFFSET_SOLID		(1 << 9)
-# define GFX6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME		(1 << 8)
-# define GFX6_SF_GLOBAL_DEPTH_OFFSET_POINT		(1 << 7)
-# define GFX6_SF_FRONT_SOLID				(0 << 5)
-# define GFX6_SF_FRONT_WIREFRAME			(1 << 5)
-# define GFX6_SF_FRONT_POINT				(2 << 5)
-# define GFX6_SF_BACK_SOLID				(0 << 3)
-# define GFX6_SF_BACK_WIREFRAME				(1 << 3)
-# define GFX6_SF_BACK_POINT				(2 << 3)
-# define GFX6_SF_VIEWPORT_TRANSFORM_ENABLE		(1 << 1)
-# define GFX6_SF_WINDING_CCW				(1 << 0)
-/* DW3 */
-# define GFX6_SF_LINE_AA_ENABLE				(1 << 31)
-# define GFX6_SF_CULL_BOTH				(0 << 29)
-# define GFX6_SF_CULL_NONE				(1 << 29)
-# define GFX6_SF_CULL_FRONT				(2 << 29)
-# define GFX6_SF_CULL_BACK				(3 << 29)
-# define GFX6_SF_LINE_WIDTH_SHIFT			18 /* U3.7 */
-# define GFX6_SF_LINE_END_CAP_WIDTH_0_5			(0 << 16)
-# define GFX6_SF_LINE_END_CAP_WIDTH_1_0			(1 << 16)
-# define GFX6_SF_LINE_END_CAP_WIDTH_2_0			(2 << 16)
-# define GFX6_SF_LINE_END_CAP_WIDTH_4_0			(3 << 16)
-# define GFX6_SF_SCISSOR_ENABLE				(1 << 11)
-# define GFX6_SF_MSRAST_OFF_PIXEL			(0 << 8)
-# define GFX6_SF_MSRAST_OFF_PATTERN			(1 << 8)
-# define GFX6_SF_MSRAST_ON_PIXEL			(2 << 8)
-# define GFX6_SF_MSRAST_ON_PATTERN			(3 << 8)
-/* DW4 */
-# define GFX6_SF_TRI_PROVOKE_SHIFT			29
-# define GFX6_SF_LINE_PROVOKE_SHIFT			27
-# define GFX6_SF_TRIFAN_PROVOKE_SHIFT			25
-# define GFX6_SF_LINE_AA_MODE_MANHATTAN			(0 << 14)
-# define GFX6_SF_LINE_AA_MODE_TRUE			(1 << 14)
-# define GFX6_SF_VERTEX_SUBPIXEL_8BITS			(0 << 12)
-# define GFX6_SF_VERTEX_SUBPIXEL_4BITS			(1 << 12)
-# define GFX6_SF_USE_STATE_POINT_WIDTH			(1 << 11)
-# define GFX6_SF_POINT_WIDTH_SHIFT			0 /* U8.3 */
-/* DW5: depth offset constant */
-/* DW6: depth offset scale */
-/* DW7: depth offset clamp */
-/* DW8 */
-# define ATTRIBUTE_1_OVERRIDE_W				(1 << 31)
-# define ATTRIBUTE_1_OVERRIDE_Z				(1 << 30)
-# define ATTRIBUTE_1_OVERRIDE_Y				(1 << 29)
-# define ATTRIBUTE_1_OVERRIDE_X				(1 << 28)
-# define ATTRIBUTE_1_CONST_SOURCE_SHIFT			25
-# define ATTRIBUTE_1_SWIZZLE_SHIFT			22
-# define ATTRIBUTE_1_SOURCE_SHIFT			16
-# define ATTRIBUTE_0_OVERRIDE_W				(1 << 15)
-# define ATTRIBUTE_0_OVERRIDE_Z				(1 << 14)
-# define ATTRIBUTE_0_OVERRIDE_Y				(1 << 13)
-# define ATTRIBUTE_0_OVERRIDE_X				(1 << 12)
-# define ATTRIBUTE_0_CONST_SOURCE_SHIFT			9
-#  define ATTRIBUTE_CONST_0000				0
-#  define ATTRIBUTE_CONST_0001_FLOAT			1
-#  define ATTRIBUTE_CONST_1111_FLOAT			2
-#  define ATTRIBUTE_CONST_PRIM_ID			3
-# define ATTRIBUTE_0_SWIZZLE_SHIFT			6
-# define ATTRIBUTE_0_SOURCE_SHIFT			0
-
-# define ATTRIBUTE_SWIZZLE_INPUTATTR                    0
-# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING             1
-# define ATTRIBUTE_SWIZZLE_INPUTATTR_W                  2
-# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W           3
-# define ATTRIBUTE_SWIZZLE_SHIFT                        6
-
-/* DW16: Point sprite texture coordinate enables */
-/* DW17: Constant interpolation enables */
-/* DW18: attr 0-7 wrap shortest enables */
-/* DW19: attr 8-16 wrap shortest enables */
-
-/* On GFX7, many fields of 3DSTATE_SF were split out into a new command:
- * 3DSTATE_SBE.  The remaining fields live in different DWords, but retain
- * the same bit-offset.  The only new field:
- */
-/* GFX7/DW1: */
-# define GFX7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT	12
-/* GFX7/DW2: */
-# define HSW_SF_LINE_STIPPLE_ENABLE			(1 << 14)
-
-# define GFX8_SF_SMOOTH_POINT_ENABLE                    (1 << 13)
-
-#define _3DSTATE_SBE				0x781F /* GFX7+ */
-/* DW1 */
-# define GFX8_SBE_FORCE_URB_ENTRY_READ_LENGTH           (1 << 29)
-# define GFX8_SBE_FORCE_URB_ENTRY_READ_OFFSET           (1 << 28)
-# define GFX7_SBE_SWIZZLE_CONTROL_MODE			(1 << 28)
-# define GFX7_SBE_NUM_OUTPUTS_SHIFT			22
-# define GFX7_SBE_SWIZZLE_ENABLE			(1 << 21)
-# define GFX7_SBE_POINT_SPRITE_LOWERLEFT		(1 << 20)
-# define GFX7_SBE_URB_ENTRY_READ_LENGTH_SHIFT		11
-# define GFX7_SBE_URB_ENTRY_READ_OFFSET_SHIFT		4
-# define GFX8_SBE_URB_ENTRY_READ_OFFSET_SHIFT		5
-/* DW2-9: Attribute setup (same as DW8-15 of gfx6 _3DSTATE_SF) */
-/* DW10: Point sprite texture coordinate enables */
-/* DW11: Constant interpolation enables */
-/* DW12: attr 0-7 wrap shortest enables */
-/* DW13: attr 8-16 wrap shortest enables */
-
-/* DW4-5: Attribute active components (gfx9) */
-#define GFX9_SBE_ACTIVE_COMPONENT_NONE			0
-#define GFX9_SBE_ACTIVE_COMPONENT_XY			1
-#define GFX9_SBE_ACTIVE_COMPONENT_XYZ			2
-#define GFX9_SBE_ACTIVE_COMPONENT_XYZW			3
-
-#define _3DSTATE_SBE_SWIZ                       0x7851 /* GFX8+ */
-
-#define _3DSTATE_RASTER                         0x7850 /* GFX8+ */
-/* DW1 */
-# define GFX9_RASTER_VIEWPORT_Z_FAR_CLIP_TEST_ENABLE    (1 << 26)
-# define GFX9_RASTER_CONSERVATIVE_RASTERIZATION_ENABLE  (1 << 24)
-# define GFX8_RASTER_FRONT_WINDING_CCW                  (1 << 21)
-# define GFX8_RASTER_CULL_BOTH                          (0 << 16)
-# define GFX8_RASTER_CULL_NONE                          (1 << 16)
-# define GFX8_RASTER_CULL_FRONT                         (2 << 16)
-# define GFX8_RASTER_CULL_BACK                          (3 << 16)
-# define GFX8_RASTER_SMOOTH_POINT_ENABLE                (1 << 13)
-# define GFX8_RASTER_API_MULTISAMPLE_ENABLE             (1 << 12)
-# define GFX8_RASTER_LINE_AA_ENABLE                     (1 << 2)
-# define GFX8_RASTER_SCISSOR_ENABLE                     (1 << 1)
-# define GFX8_RASTER_VIEWPORT_Z_CLIP_TEST_ENABLE        (1 << 0)
-# define GFX9_RASTER_VIEWPORT_Z_NEAR_CLIP_TEST_ENABLE   (1 << 0)
-
-/* Gfx8 BLEND_STATE */
-/* DW0 */
-#define GFX8_BLEND_ALPHA_TO_COVERAGE_ENABLE             (1 << 31)
-#define GFX8_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE       (1 << 30)
-#define GFX8_BLEND_ALPHA_TO_ONE_ENABLE                  (1 << 29)
-#define GFX8_BLEND_ALPHA_TO_COVERAGE_DITHER_ENABLE      (1 << 28)
-#define GFX8_BLEND_ALPHA_TEST_ENABLE                    (1 << 27)
-#define GFX8_BLEND_ALPHA_TEST_FUNCTION_MASK             INTEL_MASK(26, 24)
-#define GFX8_BLEND_ALPHA_TEST_FUNCTION_SHIFT            24
-#define GFX8_BLEND_COLOR_DITHER_ENABLE                  (1 << 23)
-#define GFX8_BLEND_X_DITHER_OFFSET_MASK                 INTEL_MASK(22, 21)
-#define GFX8_BLEND_X_DITHER_OFFSET_SHIFT                21
-#define GFX8_BLEND_Y_DITHER_OFFSET_MASK                 INTEL_MASK(20, 19)
-#define GFX8_BLEND_Y_DITHER_OFFSET_SHIFT                19
-/* DW1 + 2n */
-#define GFX8_BLEND_COLOR_BUFFER_BLEND_ENABLE            (1 << 31)
-#define GFX8_BLEND_SRC_BLEND_FACTOR_MASK                INTEL_MASK(30, 26)
-#define GFX8_BLEND_SRC_BLEND_FACTOR_SHIFT               26
-#define GFX8_BLEND_DST_BLEND_FACTOR_MASK                INTEL_MASK(25, 21)
-#define GFX8_BLEND_DST_BLEND_FACTOR_SHIFT               21
-#define GFX8_BLEND_COLOR_BLEND_FUNCTION_MASK            INTEL_MASK(20, 18)
-#define GFX8_BLEND_COLOR_BLEND_FUNCTION_SHIFT           18
-#define GFX8_BLEND_SRC_ALPHA_BLEND_FACTOR_MASK          INTEL_MASK(17, 13)
-#define GFX8_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT         13
-#define GFX8_BLEND_DST_ALPHA_BLEND_FACTOR_MASK          INTEL_MASK(12, 8)
-#define GFX8_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT         8
-#define GFX8_BLEND_ALPHA_BLEND_FUNCTION_MASK            INTEL_MASK(7, 5)
-#define GFX8_BLEND_ALPHA_BLEND_FUNCTION_SHIFT           5
-#define GFX8_BLEND_WRITE_DISABLE_ALPHA                  (1 << 3)
-#define GFX8_BLEND_WRITE_DISABLE_RED                    (1 << 2)
-#define GFX8_BLEND_WRITE_DISABLE_GREEN                  (1 << 1)
-#define GFX8_BLEND_WRITE_DISABLE_BLUE                   (1 << 0)
-/* DW1 + 2n + 1 */
-#define GFX8_BLEND_LOGIC_OP_ENABLE                      (1 << 31)
-#define GFX8_BLEND_LOGIC_OP_FUNCTION_MASK               INTEL_MASK(30, 27)
-#define GFX8_BLEND_LOGIC_OP_FUNCTION_SHIFT              27
-#define GFX8_BLEND_PRE_BLEND_SRC_ONLY_CLAMP_ENABLE      (1 << 4)
-#define GFX8_BLEND_COLOR_CLAMP_RANGE_RTFORMAT           (2 << 2)
-#define GFX8_BLEND_PRE_BLEND_COLOR_CLAMP_ENABLE         (1 << 1)
-#define GFX8_BLEND_POST_BLEND_COLOR_CLAMP_ENABLE        (1 << 0)
-
-#define _3DSTATE_WM_HZ_OP                       0x7852 /* GFX8+ */
-/* DW1 */
-# define GFX8_WM_HZ_STENCIL_CLEAR                       (1 << 31)
-# define GFX8_WM_HZ_DEPTH_CLEAR                         (1 << 30)
-# define GFX8_WM_HZ_DEPTH_RESOLVE                       (1 << 28)
-# define GFX8_WM_HZ_HIZ_RESOLVE                         (1 << 27)
-# define GFX8_WM_HZ_PIXEL_OFFSET_ENABLE                 (1 << 26)
-# define GFX8_WM_HZ_FULL_SURFACE_DEPTH_CLEAR            (1 << 25)
-# define GFX8_WM_HZ_STENCIL_CLEAR_VALUE_MASK            INTEL_MASK(23, 16)
-# define GFX8_WM_HZ_STENCIL_CLEAR_VALUE_SHIFT           16
-# define GFX8_WM_HZ_NUM_SAMPLES_MASK                    INTEL_MASK(15, 13)
-# define GFX8_WM_HZ_NUM_SAMPLES_SHIFT                   13
-/* DW2 */
-# define GFX8_WM_HZ_CLEAR_RECTANGLE_Y_MIN_MASK          INTEL_MASK(31, 16)
-# define GFX8_WM_HZ_CLEAR_RECTANGLE_Y_MIN_SHIFT         16
-# define GFX8_WM_HZ_CLEAR_RECTANGLE_X_MIN_MASK          INTEL_MASK(15, 0)
-# define GFX8_WM_HZ_CLEAR_RECTANGLE_X_MIN_SHIFT         0
-/* DW3 */
-# define GFX8_WM_HZ_CLEAR_RECTANGLE_Y_MAX_MASK          INTEL_MASK(31, 16)
-# define GFX8_WM_HZ_CLEAR_RECTANGLE_Y_MAX_SHIFT         16
-# define GFX8_WM_HZ_CLEAR_RECTANGLE_X_MAX_MASK          INTEL_MASK(15, 0)
-# define GFX8_WM_HZ_CLEAR_RECTANGLE_X_MAX_SHIFT         0
-/* DW4 */
-# define GFX8_WM_HZ_SAMPLE_MASK_MASK                    INTEL_MASK(15, 0)
-# define GFX8_WM_HZ_SAMPLE_MASK_SHIFT                   0
-
-
-#define _3DSTATE_PS_BLEND                       0x784D /* GFX8+ */
-/* DW1 */
-# define GFX8_PS_BLEND_ALPHA_TO_COVERAGE_ENABLE         (1 << 31)
-# define GFX8_PS_BLEND_HAS_WRITEABLE_RT                 (1 << 30)
-# define GFX8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE        (1 << 29)
-# define GFX8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_MASK      INTEL_MASK(28, 24)
-# define GFX8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT     24
-# define GFX8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_MASK      INTEL_MASK(23, 19)
-# define GFX8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT     19
-# define GFX8_PS_BLEND_SRC_BLEND_FACTOR_MASK            INTEL_MASK(18, 14)
-# define GFX8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT           14
-# define GFX8_PS_BLEND_DST_BLEND_FACTOR_MASK            INTEL_MASK(13, 9)
-# define GFX8_PS_BLEND_DST_BLEND_FACTOR_SHIFT           9
-# define GFX8_PS_BLEND_ALPHA_TEST_ENABLE                (1 << 8)
-# define GFX8_PS_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE   (1 << 7)
-
-#define _3DSTATE_WM_DEPTH_STENCIL               0x784E /* GFX8+ */
-/* DW1 */
-# define GFX8_WM_DS_STENCIL_FAIL_OP_SHIFT               29
-# define GFX8_WM_DS_Z_FAIL_OP_SHIFT                     26
-# define GFX8_WM_DS_Z_PASS_OP_SHIFT                     23
-# define GFX8_WM_DS_BF_STENCIL_FUNC_SHIFT               20
-# define GFX8_WM_DS_BF_STENCIL_FAIL_OP_SHIFT            17
-# define GFX8_WM_DS_BF_Z_FAIL_OP_SHIFT                  14
-# define GFX8_WM_DS_BF_Z_PASS_OP_SHIFT                  11
-# define GFX8_WM_DS_STENCIL_FUNC_SHIFT                  8
-# define GFX8_WM_DS_DEPTH_FUNC_SHIFT                    5
-# define GFX8_WM_DS_DOUBLE_SIDED_STENCIL_ENABLE         (1 << 4)
-# define GFX8_WM_DS_STENCIL_TEST_ENABLE                 (1 << 3)
-# define GFX8_WM_DS_STENCIL_BUFFER_WRITE_ENABLE         (1 << 2)
-# define GFX8_WM_DS_DEPTH_TEST_ENABLE                   (1 << 1)
-# define GFX8_WM_DS_DEPTH_BUFFER_WRITE_ENABLE           (1 << 0)
-/* DW2 */
-# define GFX8_WM_DS_STENCIL_TEST_MASK_MASK              INTEL_MASK(31, 24)
-# define GFX8_WM_DS_STENCIL_TEST_MASK_SHIFT             24
-# define GFX8_WM_DS_STENCIL_WRITE_MASK_MASK             INTEL_MASK(23, 16)
-# define GFX8_WM_DS_STENCIL_WRITE_MASK_SHIFT            16
-# define GFX8_WM_DS_BF_STENCIL_TEST_MASK_MASK           INTEL_MASK(15, 8)
-# define GFX8_WM_DS_BF_STENCIL_TEST_MASK_SHIFT          8
-# define GFX8_WM_DS_BF_STENCIL_WRITE_MASK_MASK          INTEL_MASK(7, 0)
-# define GFX8_WM_DS_BF_STENCIL_WRITE_MASK_SHIFT         0
-/* DW3 */
-# define GFX9_WM_DS_STENCIL_REF_MASK                    INTEL_MASK(15, 8)
-# define GFX9_WM_DS_STENCIL_REF_SHIFT                   8
-# define GFX9_WM_DS_BF_STENCIL_REF_MASK                 INTEL_MASK(7, 0)
-# define GFX9_WM_DS_BF_STENCIL_REF_SHIFT                0
-
-enum brw_pixel_shader_coverage_mask_mode {
-   BRW_PSICMS_OFF     = 0, /* PS does not use input coverage masks. */
-   BRW_PSICMS_NORMAL  = 1, /* Input Coverage masks based on outer conservatism
-                            * and factors in SAMPLE_MASK.  If Pixel is
-                            * conservatively covered, all samples are enabled.
-                            */
-
-   BRW_PSICMS_INNER   = 2, /* Input Coverage masks based on inner conservatism
-                            * and factors in SAMPLE_MASK.  If Pixel is
-                            * conservatively *FULLY* covered, all samples are
-                            * enabled.
-                            */
-   BRW_PCICMS_DEPTH   = 3,
-};
-
-#define _3DSTATE_PS_EXTRA                       0x784F /* GFX8+ */
-/* DW1 */
-# define GFX8_PSX_PIXEL_SHADER_VALID                    (1 << 31)
-# define GFX8_PSX_PIXEL_SHADER_NO_RT_WRITE              (1 << 30)
-# define GFX8_PSX_OMASK_TO_RENDER_TARGET                (1 << 29)
-# define GFX8_PSX_KILL_ENABLE                           (1 << 28)
-# define GFX8_PSX_COMPUTED_DEPTH_MODE_SHIFT             26
-# define GFX8_PSX_FORCE_COMPUTED_DEPTH                  (1 << 25)
-# define GFX8_PSX_USES_SOURCE_DEPTH                     (1 << 24)
-# define GFX8_PSX_USES_SOURCE_W                         (1 << 23)
-# define GFX8_PSX_ATTRIBUTE_ENABLE                      (1 << 8)
-# define GFX8_PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE     (1 << 7)
-# define GFX8_PSX_SHADER_IS_PER_SAMPLE                  (1 << 6)
-# define GFX9_PSX_SHADER_COMPUTES_STENCIL               (1 << 5)
-# define GFX9_PSX_SHADER_PULLS_BARY                     (1 << 3)
-# define GFX8_PSX_SHADER_HAS_UAV                        (1 << 2)
-# define GFX8_PSX_SHADER_USES_INPUT_COVERAGE_MASK       (1 << 1)
-# define GFX9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT     0
-
-#define _3DSTATE_WM				0x7814 /* GFX6+ */
-/* DW1: kernel pointer */
-/* DW2 */
-# define GFX6_WM_SPF_MODE				(1 << 31)
-# define GFX6_WM_VECTOR_MASK_ENABLE			(1 << 30)
-# define GFX6_WM_SAMPLER_COUNT_SHIFT			27
-# define GFX6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
-# define GFX6_WM_FLOATING_POINT_MODE_IEEE_754		(0 << 16)
-# define GFX6_WM_FLOATING_POINT_MODE_ALT		(1 << 16)
-/* DW3: scratch space */
-/* DW4 */
-# define GFX6_WM_STATISTICS_ENABLE			(1 << 31)
-# define GFX6_WM_DEPTH_CLEAR				(1 << 30)
-# define GFX6_WM_DEPTH_RESOLVE				(1 << 28)
-# define GFX6_WM_HIERARCHICAL_DEPTH_RESOLVE		(1 << 27)
-# define GFX6_WM_DISPATCH_START_GRF_SHIFT_0		16
-# define GFX6_WM_DISPATCH_START_GRF_SHIFT_1		8
-# define GFX6_WM_DISPATCH_START_GRF_SHIFT_2		0
-/* DW5 */
-# define GFX6_WM_MAX_THREADS_SHIFT			25
-# define GFX6_WM_KILL_ENABLE				(1 << 22)
-# define GFX6_WM_COMPUTED_DEPTH				(1 << 21)
-# define GFX6_WM_USES_SOURCE_DEPTH			(1 << 20)
-# define GFX6_WM_DISPATCH_ENABLE			(1 << 19)
-# define GFX6_WM_LINE_END_CAP_AA_WIDTH_0_5		(0 << 16)
-# define GFX6_WM_LINE_END_CAP_AA_WIDTH_1_0		(1 << 16)
-# define GFX6_WM_LINE_END_CAP_AA_WIDTH_2_0		(2 << 16)
-# define GFX6_WM_LINE_END_CAP_AA_WIDTH_4_0		(3 << 16)
-# define GFX6_WM_LINE_AA_WIDTH_0_5			(0 << 14)
-# define GFX6_WM_LINE_AA_WIDTH_1_0			(1 << 14)
-# define GFX6_WM_LINE_AA_WIDTH_2_0			(2 << 14)
-# define GFX6_WM_LINE_AA_WIDTH_4_0			(3 << 14)
-# define GFX6_WM_POLYGON_STIPPLE_ENABLE			(1 << 13)
-# define GFX6_WM_LINE_STIPPLE_ENABLE			(1 << 11)
-# define GFX6_WM_OMASK_TO_RENDER_TARGET			(1 << 9)
-# define GFX6_WM_USES_SOURCE_W				(1 << 8)
-# define GFX6_WM_DUAL_SOURCE_BLEND_ENABLE		(1 << 7)
-# define GFX6_WM_32_DISPATCH_ENABLE			(1 << 2)
-# define GFX6_WM_16_DISPATCH_ENABLE			(1 << 1)
-# define GFX6_WM_8_DISPATCH_ENABLE			(1 << 0)
-/* DW6 */
-# define GFX6_WM_NUM_SF_OUTPUTS_SHIFT			20
-# define GFX6_WM_POSOFFSET_NONE				(0 << 18)
-# define GFX6_WM_POSOFFSET_CENTROID			(2 << 18)
-# define GFX6_WM_POSOFFSET_SAMPLE			(3 << 18)
-# define GFX6_WM_POSITION_ZW_PIXEL			(0 << 16)
-# define GFX6_WM_POSITION_ZW_CENTROID			(2 << 16)
-# define GFX6_WM_POSITION_ZW_SAMPLE			(3 << 16)
-# define GFX6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC	(1 << 15)
-# define GFX6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 14)
-# define GFX6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC	(1 << 13)
-# define GFX6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC		(1 << 12)
-# define GFX6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 11)
-# define GFX6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC		(1 << 10)
-# define GFX6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT   10
-# define GFX6_WM_POINT_RASTRULE_UPPER_RIGHT		(1 << 9)
-# define GFX6_WM_MSRAST_OFF_PIXEL			(0 << 1)
-# define GFX6_WM_MSRAST_OFF_PATTERN			(1 << 1)
-# define GFX6_WM_MSRAST_ON_PIXEL			(2 << 1)
-# define GFX6_WM_MSRAST_ON_PATTERN			(3 << 1)
-# define GFX6_WM_MSDISPMODE_PERSAMPLE			(0 << 0)
-# define GFX6_WM_MSDISPMODE_PERPIXEL			(1 << 0)
-/* DW7: kernel 1 pointer */
-/* DW8: kernel 2 pointer */
-
-#define _3DSTATE_CONSTANT_VS		      0x7815 /* GFX6+ */
-#define _3DSTATE_CONSTANT_GS		      0x7816 /* GFX6+ */
-#define _3DSTATE_CONSTANT_PS		      0x7817 /* GFX6+ */
-# define GFX6_CONSTANT_BUFFER_3_ENABLE			(1 << 15)
-# define GFX6_CONSTANT_BUFFER_2_ENABLE			(1 << 14)
-# define GFX6_CONSTANT_BUFFER_1_ENABLE			(1 << 13)
-# define GFX6_CONSTANT_BUFFER_0_ENABLE			(1 << 12)
-
-#define _3DSTATE_CONSTANT_HS                  0x7819 /* GFX7+ */
-#define _3DSTATE_CONSTANT_DS                  0x781A /* GFX7+ */
-
-#define _3DSTATE_STREAMOUT                    0x781e /* GFX7+ */
-/* DW1 */
-# define SO_FUNCTION_ENABLE				(1 << 31)
-# define SO_RENDERING_DISABLE				(1 << 30)
-/* This selects which incoming rendering stream goes down the pipeline.  The
- * rendering stream is 0 if not defined by special cases in the GS state.
- */
-# define SO_RENDER_STREAM_SELECT_SHIFT			27
-# define SO_RENDER_STREAM_SELECT_MASK			INTEL_MASK(28, 27)
-/* Controls reordering of TRISTRIP_* elements in stream output (not rendering).
- */
-# define SO_REORDER_TRAILING				(1 << 26)
-/* Controls SO_NUM_PRIMS_WRITTEN_* and SO_PRIM_STORAGE_* */
-# define SO_STATISTICS_ENABLE				(1 << 25)
-# define SO_BUFFER_ENABLE(n)				(1 << (8 + (n)))
-/* DW2 */
-# define SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT		29
-# define SO_STREAM_3_VERTEX_READ_OFFSET_MASK		INTEL_MASK(29, 29)
-# define SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT		24
-# define SO_STREAM_3_VERTEX_READ_LENGTH_MASK		INTEL_MASK(28, 24)
-# define SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT		21
-# define SO_STREAM_2_VERTEX_READ_OFFSET_MASK		INTEL_MASK(21, 21)
-# define SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT		16
-# define SO_STREAM_2_VERTEX_READ_LENGTH_MASK		INTEL_MASK(20, 16)
-# define SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT		13
-# define SO_STREAM_1_VERTEX_READ_OFFSET_MASK		INTEL_MASK(13, 13)
-# define SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT		8
-# define SO_STREAM_1_VERTEX_READ_LENGTH_MASK		INTEL_MASK(12, 8)
-# define SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT		5
-# define SO_STREAM_0_VERTEX_READ_OFFSET_MASK		INTEL_MASK(5, 5)
-# define SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT		0
-# define SO_STREAM_0_VERTEX_READ_LENGTH_MASK		INTEL_MASK(4, 0)
-
-/* 3DSTATE_WM for Gfx7 */
-/* DW1 */
-# define GFX7_WM_STATISTICS_ENABLE			(1 << 31)
-# define GFX7_WM_DEPTH_CLEAR				(1 << 30)
-# define GFX7_WM_DISPATCH_ENABLE			(1 << 29)
-# define GFX7_WM_DEPTH_RESOLVE				(1 << 28)
-# define GFX7_WM_HIERARCHICAL_DEPTH_RESOLVE		(1 << 27)
-# define GFX7_WM_KILL_ENABLE				(1 << 25)
-# define GFX7_WM_COMPUTED_DEPTH_MODE_SHIFT              23
-# define GFX7_WM_USES_SOURCE_DEPTH			(1 << 20)
-# define GFX7_WM_EARLY_DS_CONTROL_NORMAL                (0 << 21)
-# define GFX7_WM_EARLY_DS_CONTROL_PSEXEC                (1 << 21)
-# define GFX7_WM_EARLY_DS_CONTROL_PREPS                 (2 << 21)
-# define GFX7_WM_USES_SOURCE_W			        (1 << 19)
-# define GFX7_WM_POSITION_ZW_PIXEL			(0 << 17)
-# define GFX7_WM_POSITION_ZW_CENTROID			(2 << 17)
-# define GFX7_WM_POSITION_ZW_SAMPLE			(3 << 17)
-# define GFX7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT   11
-# define GFX7_WM_USES_INPUT_COVERAGE_MASK	        (1 << 10)
-# define GFX7_WM_LINE_END_CAP_AA_WIDTH_0_5		(0 << 8)
-# define GFX7_WM_LINE_END_CAP_AA_WIDTH_1_0		(1 << 8)
-# define GFX7_WM_LINE_END_CAP_AA_WIDTH_2_0		(2 << 8)
-# define GFX7_WM_LINE_END_CAP_AA_WIDTH_4_0		(3 << 8)
-# define GFX7_WM_LINE_AA_WIDTH_0_5			(0 << 6)
-# define GFX7_WM_LINE_AA_WIDTH_1_0			(1 << 6)
-# define GFX7_WM_LINE_AA_WIDTH_2_0			(2 << 6)
-# define GFX7_WM_LINE_AA_WIDTH_4_0			(3 << 6)
-# define GFX7_WM_POLYGON_STIPPLE_ENABLE			(1 << 4)
-# define GFX7_WM_LINE_STIPPLE_ENABLE			(1 << 3)
-# define GFX7_WM_POINT_RASTRULE_UPPER_RIGHT		(1 << 2)
-# define GFX7_WM_MSRAST_OFF_PIXEL			(0 << 0)
-# define GFX7_WM_MSRAST_OFF_PATTERN			(1 << 0)
-# define GFX7_WM_MSRAST_ON_PIXEL			(2 << 0)
-# define GFX7_WM_MSRAST_ON_PATTERN			(3 << 0)
-/* DW2 */
-# define GFX7_WM_MSDISPMODE_PERSAMPLE			(0 << 31)
-# define GFX7_WM_MSDISPMODE_PERPIXEL			(1 << 31)
-# define HSW_WM_UAV_ONLY                                (1 << 30)
-
-#define _3DSTATE_PS				0x7820 /* GFX7+ */
-/* DW1: kernel pointer */
-/* DW2 */
-# define GFX7_PS_SPF_MODE				(1 << 31)
-# define GFX7_PS_VECTOR_MASK_ENABLE			(1 << 30)
-# define GFX7_PS_SAMPLER_COUNT_SHIFT			27
-# define GFX7_PS_SAMPLER_COUNT_MASK                     INTEL_MASK(29, 27)
-# define GFX7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
-# define GFX7_PS_FLOATING_POINT_MODE_IEEE_754		(0 << 16)
-# define GFX7_PS_FLOATING_POINT_MODE_ALT		(1 << 16)
-/* DW3: scratch space */
-/* DW4 */
-# define IVB_PS_MAX_THREADS_SHIFT			24
-# define HSW_PS_MAX_THREADS_SHIFT			23
-# define HSW_PS_SAMPLE_MASK_SHIFT		        12
-# define HSW_PS_SAMPLE_MASK_MASK			INTEL_MASK(19, 12)
-# define GFX7_PS_PUSH_CONSTANT_ENABLE		        (1 << 11)
-# define GFX7_PS_ATTRIBUTE_ENABLE		        (1 << 10)
-# define GFX7_PS_OMASK_TO_RENDER_TARGET			(1 << 9)
-# define GFX7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE	(1 << 8)
-# define GFX7_PS_DUAL_SOURCE_BLEND_ENABLE		(1 << 7)
-# define GFX7_PS_RENDER_TARGET_RESOLVE_ENABLE		(1 << 6)
-# define GFX9_PS_RENDER_TARGET_RESOLVE_FULL             (3 << 6)
-# define HSW_PS_UAV_ACCESS_ENABLE			(1 << 5)
-# define GFX7_PS_POSOFFSET_NONE				(0 << 3)
-# define GFX7_PS_POSOFFSET_CENTROID			(2 << 3)
-# define GFX7_PS_POSOFFSET_SAMPLE			(3 << 3)
-# define GFX7_PS_32_DISPATCH_ENABLE			(1 << 2)
-# define GFX7_PS_16_DISPATCH_ENABLE			(1 << 1)
-# define GFX7_PS_8_DISPATCH_ENABLE			(1 << 0)
-/* DW5 */
-# define GFX7_PS_DISPATCH_START_GRF_SHIFT_0		16
-# define GFX7_PS_DISPATCH_START_GRF_SHIFT_1		8
-# define GFX7_PS_DISPATCH_START_GRF_SHIFT_2		0
-/* DW6: kernel 1 pointer */
-/* DW7: kernel 2 pointer */
-
-#define _3DSTATE_SAMPLE_MASK			0x7818 /* GFX6+ */
-
-#define _3DSTATE_DRAWING_RECTANGLE		0x7900
-#define _3DSTATE_BLEND_CONSTANT_COLOR		0x7901
-#define _3DSTATE_CHROMA_KEY			0x7904
-#define _3DSTATE_DEPTH_BUFFER			0x7905 /* GFX4-6 */
-#define _3DSTATE_POLY_STIPPLE_OFFSET		0x7906
-#define _3DSTATE_POLY_STIPPLE_PATTERN		0x7907
-#define _3DSTATE_LINE_STIPPLE_PATTERN		0x7908
-#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP	0x7909
-#define _3DSTATE_AA_LINE_PARAMETERS		0x790a /* G45+ */
-
-#define _3DSTATE_GS_SVB_INDEX			0x790b /* CTG+ */
-/* DW1 */
-# define SVB_INDEX_SHIFT				29
-# define SVB_LOAD_INTERNAL_VERTEX_COUNT			(1 << 0) /* SNB+ */
-/* DW2: SVB index */
-/* DW3: SVB maximum index */
-
-#define _3DSTATE_MULTISAMPLE			0x790d /* GFX6+ */
-#define GFX8_3DSTATE_MULTISAMPLE		0x780d /* GFX8+ */
-/* DW1 */
-# define MS_PIXEL_LOCATION_CENTER			(0 << 4)
-# define MS_PIXEL_LOCATION_UPPER_LEFT			(1 << 4)
-# define MS_NUMSAMPLES_1				(0 << 1)
-# define MS_NUMSAMPLES_2				(1 << 1)
-# define MS_NUMSAMPLES_4				(2 << 1)
-# define MS_NUMSAMPLES_8				(3 << 1)
-# define MS_NUMSAMPLES_16				(4 << 1)
-
-#define _3DSTATE_SAMPLE_PATTERN                 0x791c
-
-#define _3DSTATE_STENCIL_BUFFER			0x790e /* ILK, SNB */
-#define _3DSTATE_HIER_DEPTH_BUFFER		0x790f /* ILK, SNB */
-
-#define GFX7_3DSTATE_CLEAR_PARAMS		0x7804
-#define GFX7_3DSTATE_DEPTH_BUFFER		0x7805
-#define GFX7_3DSTATE_STENCIL_BUFFER		0x7806
-# define HSW_STENCIL_ENABLED                            (1 << 31)
-#define GFX7_3DSTATE_HIER_DEPTH_BUFFER		0x7807
-
-#define _3DSTATE_CLEAR_PARAMS			0x7910 /* ILK, SNB */
-# define GFX5_DEPTH_CLEAR_VALID				(1 << 15)
-/* DW1: depth clear value */
-/* DW2 */
-# define GFX7_DEPTH_CLEAR_VALID				(1 << 0)
-
-#define _3DSTATE_SO_DECL_LIST			0x7917 /* GFX7+ */
-/* DW1 */
-# define SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT		12
-# define SO_STREAM_TO_BUFFER_SELECTS_3_MASK		INTEL_MASK(15, 12)
-# define SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT		8
-# define SO_STREAM_TO_BUFFER_SELECTS_2_MASK		INTEL_MASK(11, 8)
-# define SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT		4
-# define SO_STREAM_TO_BUFFER_SELECTS_1_MASK		INTEL_MASK(7, 4)
-# define SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT		0
-# define SO_STREAM_TO_BUFFER_SELECTS_0_MASK		INTEL_MASK(3, 0)
-/* DW2 */
-# define SO_NUM_ENTRIES_3_SHIFT				24
-# define SO_NUM_ENTRIES_3_MASK				INTEL_MASK(31, 24)
-# define SO_NUM_ENTRIES_2_SHIFT				16
-# define SO_NUM_ENTRIES_2_MASK				INTEL_MASK(23, 16)
-# define SO_NUM_ENTRIES_1_SHIFT				8
-# define SO_NUM_ENTRIES_1_MASK				INTEL_MASK(15, 8)
-# define SO_NUM_ENTRIES_0_SHIFT				0
-# define SO_NUM_ENTRIES_0_MASK				INTEL_MASK(7, 0)
-
-/* SO_DECL DW0 */
-# define SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT		12
-# define SO_DECL_OUTPUT_BUFFER_SLOT_MASK		INTEL_MASK(13, 12)
-# define SO_DECL_HOLE_FLAG				(1 << 11)
-# define SO_DECL_REGISTER_INDEX_SHIFT			4
-# define SO_DECL_REGISTER_INDEX_MASK			INTEL_MASK(9, 4)
-# define SO_DECL_COMPONENT_MASK_SHIFT			0
-# define SO_DECL_COMPONENT_MASK_MASK			INTEL_MASK(3, 0)
-
-#define _3DSTATE_SO_BUFFER                    0x7918 /* GFX7+ */
-/* DW1 */
-# define GFX8_SO_BUFFER_ENABLE                          (1 << 31)
-# define SO_BUFFER_INDEX_SHIFT				29
-# define SO_BUFFER_INDEX_MASK				INTEL_MASK(30, 29)
-# define GFX8_SO_BUFFER_OFFSET_WRITE_ENABLE             (1 << 21)
-# define GFX8_SO_BUFFER_OFFSET_ADDRESS_ENABLE           (1 << 20)
-# define SO_BUFFER_PITCH_SHIFT				0
-# define SO_BUFFER_PITCH_MASK				INTEL_MASK(11, 0)
-/* DW2: start address */
-/* DW3: end address. */
-
-#define _3DSTATE_3D_MODE                     0x791e
-# define SLICE_HASHING_TABLE_ENABLE          (1 << 6)
-# define SLICE_HASHING_TABLE_ENABLE_MASK     REG_MASK(1 << 6)
-
-#define _3DSTATE_SLICE_TABLE_STATE_POINTERS  0x7920
-
-#define CMD_MI_FLUSH                  0x0200
-
-# define BLT_X_SHIFT					0
-# define BLT_X_MASK					INTEL_MASK(15, 0)
-# define BLT_Y_SHIFT					16
-# define BLT_Y_MASK					INTEL_MASK(31, 16)
-
-#define GFX5_MI_REPORT_PERF_COUNT ((0x26 << 23) | (3 - 2))
-/* DW0 */
-# define GFX5_MI_COUNTER_SET_0      (0 << 6)
-# define GFX5_MI_COUNTER_SET_1      (1 << 6)
-/* DW1 */
-# define MI_COUNTER_ADDRESS_GTT     (1 << 0)
-/* DW2: a user-defined report ID (written to the buffer but can be anything) */
-
-#define GFX6_MI_REPORT_PERF_COUNT ((0x28 << 23) | (3 - 2))
-
-#define GFX8_MI_REPORT_PERF_COUNT ((0x28 << 23) | (4 - 2))
-
-/* Maximum number of entries that can be addressed using a binding table
- * pointer of type SURFTYPE_BUFFER
- */
-#define BRW_MAX_NUM_BUFFER_ENTRIES	(1 << 27)
-
-#define MEDIA_VFE_STATE                         0x7000
-/* GFX7 DW2, GFX8+ DW3 */
-# define MEDIA_VFE_STATE_MAX_THREADS_SHIFT      16
-# define MEDIA_VFE_STATE_MAX_THREADS_MASK       INTEL_MASK(31, 16)
-# define MEDIA_VFE_STATE_URB_ENTRIES_SHIFT      8
-# define MEDIA_VFE_STATE_URB_ENTRIES_MASK       INTEL_MASK(15, 8)
-# define MEDIA_VFE_STATE_RESET_GTW_TIMER_SHIFT  7
-# define MEDIA_VFE_STATE_RESET_GTW_TIMER_MASK   INTEL_MASK(7, 7)
-# define MEDIA_VFE_STATE_BYPASS_GTW_SHIFT       6
-# define MEDIA_VFE_STATE_BYPASS_GTW_MASK        INTEL_MASK(6, 6)
-# define GFX7_MEDIA_VFE_STATE_GPGPU_MODE_SHIFT  2
-# define GFX7_MEDIA_VFE_STATE_GPGPU_MODE_MASK   INTEL_MASK(2, 2)
-/* GFX7 DW4, GFX8+ DW5 */
-# define MEDIA_VFE_STATE_URB_ALLOC_SHIFT        16
-# define MEDIA_VFE_STATE_URB_ALLOC_MASK         INTEL_MASK(31, 16)
-# define MEDIA_VFE_STATE_CURBE_ALLOC_SHIFT      0
-# define MEDIA_VFE_STATE_CURBE_ALLOC_MASK       INTEL_MASK(15, 0)
-
-#define MEDIA_CURBE_LOAD                        0x7001
-#define MEDIA_INTERFACE_DESCRIPTOR_LOAD         0x7002
-/* GFX7 DW4, GFX8+ DW5 */
-# define MEDIA_CURBE_READ_LENGTH_SHIFT          16
-# define MEDIA_CURBE_READ_LENGTH_MASK           INTEL_MASK(31, 16)
-# define MEDIA_CURBE_READ_OFFSET_SHIFT          0
-# define MEDIA_CURBE_READ_OFFSET_MASK           INTEL_MASK(15, 0)
-/* GFX7 DW5, GFX8+ DW6 */
-# define MEDIA_BARRIER_ENABLE_SHIFT             21
-# define MEDIA_BARRIER_ENABLE_MASK              INTEL_MASK(21, 21)
-# define MEDIA_SHARED_LOCAL_MEMORY_SIZE_SHIFT   16
-# define MEDIA_SHARED_LOCAL_MEMORY_SIZE_MASK    INTEL_MASK(20, 16)
-# define MEDIA_GPGPU_THREAD_COUNT_SHIFT         0
-# define MEDIA_GPGPU_THREAD_COUNT_MASK          INTEL_MASK(7, 0)
-# define GFX8_MEDIA_GPGPU_THREAD_COUNT_SHIFT    0
-# define GFX8_MEDIA_GPGPU_THREAD_COUNT_MASK     INTEL_MASK(9, 0)
-/* GFX7 DW6, GFX8+ DW7 */
-# define CROSS_THREAD_READ_LENGTH_SHIFT         0
-# define CROSS_THREAD_READ_LENGTH_MASK          INTEL_MASK(7, 0)
-#define MEDIA_STATE_FLUSH                       0x7004
-#define GPGPU_WALKER                            0x7105
-/* GFX7 DW0 */
-# define GFX7_GPGPU_INDIRECT_PARAMETER_ENABLE   (1 << 10)
-# define GFX7_GPGPU_PREDICATE_ENABLE            (1 << 8)
-/* GFX8+ DW2 */
-# define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT     0
-# define GPGPU_WALKER_INDIRECT_LENGTH_MASK      INTEL_MASK(15, 0)
-/* GFX7 DW2, GFX8+ DW4 */
-# define GPGPU_WALKER_SIMD_SIZE_SHIFT           30
-# define GPGPU_WALKER_SIMD_SIZE_MASK            INTEL_MASK(31, 30)
-# define GPGPU_WALKER_THREAD_DEPTH_MAX_SHIFT    16
-# define GPGPU_WALKER_THREAD_DEPTH_MAX_MASK     INTEL_MASK(21, 16)
-# define GPGPU_WALKER_THREAD_HEIGHT_MAX_SHIFT   8
-# define GPGPU_WALKER_THREAD_HEIGHT_MAX_MASK    INTEL_MASK(31, 8)
-# define GPGPU_WALKER_THREAD_WIDTH_MAX_SHIFT    0
-# define GPGPU_WALKER_THREAD_WIDTH_MAX_MASK     INTEL_MASK(5, 0)
-
-#define CMD_MI				(0x0 << 29)
-#define CMD_2D				(0x2 << 29)
-#define CMD_3D				(0x3 << 29)
-
-#define MI_NOOP				(CMD_MI | 0)
-
-#define MI_BATCH_BUFFER_END		(CMD_MI | 0xA << 23)
-
-#define MI_FLUSH			(CMD_MI | (4 << 23))
-#define FLUSH_MAP_CACHE				(1 << 0)
-#define INHIBIT_FLUSH_RENDER_CACHE		(1 << 2)
-
-#define MI_STORE_DATA_IMM		(CMD_MI | (0x20 << 23))
-#define MI_LOAD_REGISTER_IMM		(CMD_MI | (0x22 << 23))
-#define MI_LOAD_REGISTER_REG		(CMD_MI | (0x2A << 23))
-
-#define MI_FLUSH_DW			(CMD_MI | (0x26 << 23))
-
-#define MI_STORE_REGISTER_MEM		(CMD_MI | (0x24 << 23))
-# define MI_STORE_REGISTER_MEM_USE_GGTT		(1 << 22)
-# define MI_STORE_REGISTER_MEM_PREDICATE	(1 << 21)
-
-/* Load a value from memory into a register.  Only available on Gfx7+. */
-#define GFX7_MI_LOAD_REGISTER_MEM	(CMD_MI | (0x29 << 23))
-# define MI_LOAD_REGISTER_MEM_USE_GGTT		(1 << 22)
-
-/* Manipulate the predicate bit based on some register values. Only on Gfx7+ */
-#define GFX7_MI_PREDICATE		(CMD_MI | (0xC << 23))
-# define MI_PREDICATE_LOADOP_KEEP		(0 << 6)
-# define MI_PREDICATE_LOADOP_LOAD		(2 << 6)
-# define MI_PREDICATE_LOADOP_LOADINV		(3 << 6)
-# define MI_PREDICATE_COMBINEOP_SET		(0 << 3)
-# define MI_PREDICATE_COMBINEOP_AND		(1 << 3)
-# define MI_PREDICATE_COMBINEOP_OR		(2 << 3)
-# define MI_PREDICATE_COMBINEOP_XOR		(3 << 3)
-# define MI_PREDICATE_COMPAREOP_TRUE		(0 << 0)
-# define MI_PREDICATE_COMPAREOP_FALSE		(1 << 0)
-# define MI_PREDICATE_COMPAREOP_SRCS_EQUAL	(2 << 0)
-# define MI_PREDICATE_COMPAREOP_DELTAS_EQUAL	(3 << 0)
-
-#define HSW_MI_MATH			(CMD_MI | (0x1a << 23))
-
-#define MI_MATH_ALU2(opcode, operand1, operand2) \
-   ( ((MI_MATH_OPCODE_##opcode) << 20) | ((MI_MATH_OPERAND_##operand1) << 10) | \
-     ((MI_MATH_OPERAND_##operand2) << 0) )
-
-#define MI_MATH_ALU1(opcode, operand1) \
-   ( ((MI_MATH_OPCODE_##opcode) << 20) | ((MI_MATH_OPERAND_##operand1) << 10) )
-
-#define MI_MATH_ALU0(opcode) \
-   ( ((MI_MATH_OPCODE_##opcode) << 20) )
-
-#define MI_MATH_OPCODE_NOOP      0x000
-#define MI_MATH_OPCODE_LOAD      0x080
-#define MI_MATH_OPCODE_LOADINV   0x480
-#define MI_MATH_OPCODE_LOAD0     0x081
-#define MI_MATH_OPCODE_LOAD1     0x481
-#define MI_MATH_OPCODE_ADD       0x100
-#define MI_MATH_OPCODE_SUB       0x101
-#define MI_MATH_OPCODE_AND       0x102
-#define MI_MATH_OPCODE_OR        0x103
-#define MI_MATH_OPCODE_XOR       0x104
-#define MI_MATH_OPCODE_STORE     0x180
-#define MI_MATH_OPCODE_STOREINV  0x580
-
-#define MI_MATH_OPERAND_R0   0x00
-#define MI_MATH_OPERAND_R1   0x01
-#define MI_MATH_OPERAND_R2   0x02
-#define MI_MATH_OPERAND_R3   0x03
-#define MI_MATH_OPERAND_R4   0x04
-#define MI_MATH_OPERAND_SRCA 0x20
-#define MI_MATH_OPERAND_SRCB 0x21
-#define MI_MATH_OPERAND_ACCU 0x31
-#define MI_MATH_OPERAND_ZF   0x32
-#define MI_MATH_OPERAND_CF   0x33
-
-#define XY_SETUP_BLT_CMD		(CMD_2D | (0x01 << 22))
-
-#define XY_COLOR_BLT_CMD		(CMD_2D | (0x50 << 22))
-
-#define XY_SRC_COPY_BLT_CMD             (CMD_2D | (0x53 << 22))
-
-#define XY_FAST_COPY_BLT_CMD             (CMD_2D | (0x42 << 22))
-
-#define XY_TEXT_IMMEDIATE_BLIT_CMD	(CMD_2D | (0x31 << 22))
-# define XY_TEXT_BYTE_PACKED		(1 << 16)
-
-/* BR00 */
-#define XY_BLT_WRITE_ALPHA	(1 << 21)
-#define XY_BLT_WRITE_RGB	(1 << 20)
-#define XY_SRC_TILED		(1 << 15)
-#define XY_DST_TILED		(1 << 11)
-
-/* BR00 */
-#define XY_FAST_SRC_TILED_64K        (3 << 20)
-#define XY_FAST_SRC_TILED_Y          (2 << 20)
-#define XY_FAST_SRC_TILED_X          (1 << 20)
-
-#define XY_FAST_DST_TILED_64K        (3 << 13)
-#define XY_FAST_DST_TILED_Y          (2 << 13)
-#define XY_FAST_DST_TILED_X          (1 << 13)
-
-/* BR13 */
-#define BR13_8			(0x0 << 24)
-#define BR13_565		(0x1 << 24)
-#define BR13_8888		(0x3 << 24)
-#define BR13_16161616		(0x4 << 24)
-#define BR13_32323232		(0x5 << 24)
-
-#define GFX6_SO_PRIM_STORAGE_NEEDED     0x2280
-#define GFX7_SO_PRIM_STORAGE_NEEDED(n)  (0x5240 + (n) * 8)
-
-#define GFX6_SO_NUM_PRIMS_WRITTEN       0x2288
-#define GFX7_SO_NUM_PRIMS_WRITTEN(n)    (0x5200 + (n) * 8)
-
-#define GFX7_SO_WRITE_OFFSET(n)         (0x5280 + (n) * 4)
-
-#define TIMESTAMP                       0x2358
-
-#define BCS_SWCTRL                      0x22200
-# define BCS_SWCTRL_SRC_Y               (1 << 0)
-# define BCS_SWCTRL_DST_Y               (1 << 1)
-
-#define OACONTROL                       0x2360
-# define OACONTROL_COUNTER_SELECT_SHIFT  2
-# define OACONTROL_ENABLE_COUNTERS       (1 << 0)
-
-/* Auto-Draw / Indirect Registers */
-#define GFX7_3DPRIM_END_OFFSET          0x2420
-#define GFX7_3DPRIM_START_VERTEX        0x2430
-#define GFX7_3DPRIM_VERTEX_COUNT        0x2434
-#define GFX7_3DPRIM_INSTANCE_COUNT      0x2438
-#define GFX7_3DPRIM_START_INSTANCE      0x243C
-#define GFX7_3DPRIM_BASE_VERTEX         0x2440
-
-/* Auto-Compute / Indirect Registers */
-#define GFX7_GPGPU_DISPATCHDIMX         0x2500
-#define GFX7_GPGPU_DISPATCHDIMY         0x2504
-#define GFX7_GPGPU_DISPATCHDIMZ         0x2508
-
-#define GFX7_CACHE_MODE_0               0x7000
-#define GFX7_CACHE_MODE_1               0x7004
-# define GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
-# define GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT    (1 << 9)
-# define GFX8_HIZ_NP_PMA_FIX_ENABLE        (1 << 11)
-# define GFX8_HIZ_NP_EARLY_Z_FAILS_DISABLE (1 << 13)
-# define GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC (1 << 1)
-# define GFX8_HIZ_PMA_MASK_BITS \
-   REG_MASK(GFX8_HIZ_NP_PMA_FIX_ENABLE | GFX8_HIZ_NP_EARLY_Z_FAILS_DISABLE)
-# define GFX11_DISABLE_REPACKING_FOR_COMPRESSION (1 << 15)
-
-#define GFX7_GT_MODE                    0x7008
-# define GFX9_SUBSLICE_HASHING_8x8      (0 << 8)
-# define GFX9_SUBSLICE_HASHING_16x4     (1 << 8)
-# define GFX9_SUBSLICE_HASHING_8x4      (2 << 8)
-# define GFX9_SUBSLICE_HASHING_16x16    (3 << 8)
-# define GFX9_SUBSLICE_HASHING_MASK_BITS REG_MASK(3 << 8)
-# define GFX9_SLICE_HASHING_NORMAL      (0 << 11)
-# define GFX9_SLICE_HASHING_DISABLED    (1 << 11)
-# define GFX9_SLICE_HASHING_32x16       (2 << 11)
-# define GFX9_SLICE_HASHING_32x32       (3 << 11)
-# define GFX9_SLICE_HASHING_MASK_BITS REG_MASK(3 << 11)
-
-/* Predicate registers */
-#define MI_PREDICATE_SRC0               0x2400
-#define MI_PREDICATE_SRC1               0x2408
-#define MI_PREDICATE_DATA               0x2410
-#define MI_PREDICATE_RESULT             0x2418
-#define MI_PREDICATE_RESULT_1           0x241C
-#define MI_PREDICATE_RESULT_2           0x2214
-
-#define HSW_CS_GPR(n) (0x2600 + (n) * 8)
-
-/* L3 cache control registers. */
-#define GFX7_L3SQCREG1                     0xb010
-/* L3SQ general and high priority credit initialization. */
-# define IVB_L3SQCREG1_SQGHPCI_DEFAULT     0x00730000
-# define VLV_L3SQCREG1_SQGHPCI_DEFAULT     0x00d30000
-# define HSW_L3SQCREG1_SQGHPCI_DEFAULT     0x00610000
-# define GFX7_L3SQCREG1_CONV_DC_UC         (1 << 24)
-# define GFX7_L3SQCREG1_CONV_IS_UC         (1 << 25)
-# define GFX7_L3SQCREG1_CONV_C_UC          (1 << 26)
-# define GFX7_L3SQCREG1_CONV_T_UC          (1 << 27)
-
-#define GFX7_L3CNTLREG2                    0xb020
-# define GFX7_L3CNTLREG2_SLM_ENABLE        (1 << 0)
-# define GFX7_L3CNTLREG2_URB_ALLOC_SHIFT   1
-# define GFX7_L3CNTLREG2_URB_ALLOC_MASK    INTEL_MASK(6, 1)
-# define GFX7_L3CNTLREG2_URB_LOW_BW        (1 << 7)
-# define GFX7_L3CNTLREG2_ALL_ALLOC_SHIFT   8
-# define GFX7_L3CNTLREG2_ALL_ALLOC_MASK    INTEL_MASK(13, 8)
-# define GFX7_L3CNTLREG2_RO_ALLOC_SHIFT    14
-# define GFX7_L3CNTLREG2_RO_ALLOC_MASK     INTEL_MASK(19, 14)
-# define GFX7_L3CNTLREG2_RO_LOW_BW         (1 << 20)
-# define GFX7_L3CNTLREG2_DC_ALLOC_SHIFT    21
-# define GFX7_L3CNTLREG2_DC_ALLOC_MASK     INTEL_MASK(26, 21)
-# define GFX7_L3CNTLREG2_DC_LOW_BW         (1 << 27)
-
-#define GFX7_L3CNTLREG3                    0xb024
-# define GFX7_L3CNTLREG3_IS_ALLOC_SHIFT    1
-# define GFX7_L3CNTLREG3_IS_ALLOC_MASK     INTEL_MASK(6, 1)
-# define GFX7_L3CNTLREG3_IS_LOW_BW         (1 << 7)
-# define GFX7_L3CNTLREG3_C_ALLOC_SHIFT     8
-# define GFX7_L3CNTLREG3_C_ALLOC_MASK      INTEL_MASK(13, 8)
-# define GFX7_L3CNTLREG3_C_LOW_BW          (1 << 14)
-# define GFX7_L3CNTLREG3_T_ALLOC_SHIFT     15
-# define GFX7_L3CNTLREG3_T_ALLOC_MASK      INTEL_MASK(20, 15)
-# define GFX7_L3CNTLREG3_T_LOW_BW          (1 << 21)
-
-#define HSW_SCRATCH1                       0xb038
-#define HSW_SCRATCH1_L3_ATOMIC_DISABLE     (1 << 27)
-
-#define HSW_ROW_CHICKEN3                   0xe49c
-#define HSW_ROW_CHICKEN3_L3_ATOMIC_DISABLE (1 << 6)
-
-#define GFX8_L3CNTLREG                     0x7034
-# define GFX8_L3CNTLREG_SLM_ENABLE         (1 << 0)
-# define GFX8_L3CNTLREG_URB_ALLOC_SHIFT    1
-# define GFX8_L3CNTLREG_URB_ALLOC_MASK     INTEL_MASK(7, 1)
-# define GFX8_L3CNTLREG_RO_ALLOC_SHIFT     11
-# define GFX8_L3CNTLREG_RO_ALLOC_MASK      INTEL_MASK(17, 11)
-# define GFX8_L3CNTLREG_DC_ALLOC_SHIFT     18
-# define GFX8_L3CNTLREG_DC_ALLOC_MASK      INTEL_MASK(24, 18)
-# define GFX8_L3CNTLREG_ALL_ALLOC_SHIFT    25
-# define GFX8_L3CNTLREG_ALL_ALLOC_MASK     INTEL_MASK(31, 25)
-# define GFX8_L3CNTLREG_EDBC_NO_HANG       (1 << 9)
-# define GFX11_L3CNTLREG_USE_FULL_WAYS     (1 << 10)
-
-#define GFX10_CACHE_MODE_SS            0x0e420
-#define GFX10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
-
-#define INSTPM                             0x20c0
-# define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6)
-
-#define CS_DEBUG_MODE2                     0x20d8 /* Gfx9+ */
-# define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
-
-#define SLICE_COMMON_ECO_CHICKEN1          0x731c /* Gfx9+ */
-# define GLK_SCEC_BARRIER_MODE_GPGPU       (0 << 7)
-# define GLK_SCEC_BARRIER_MODE_3D_HULL     (1 << 7)
-# define GLK_SCEC_BARRIER_MODE_MASK        REG_MASK(1 << 7)
-# define GFX11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE (1 << 11)
-
-#define HALF_SLICE_CHICKEN7                0xE194
-# define TEXEL_OFFSET_FIX_ENABLE           (1 << 1)
-# define TEXEL_OFFSET_FIX_MASK             REG_MASK(1 << 1)
-
-#define GFX11_SAMPLER_MODE                                  0xE18C
-# define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS        (1 << 5)
-# define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK   REG_MASK(1 << 5)
-
-#define CS_CHICKEN1                        0x2580 /* Gfx9+ */
-# define GFX9_REPLAY_MODE_MIDBUFFER             (0 << 0)
-# define GFX9_REPLAY_MODE_MIDOBJECT             (1 << 0)
-# define GFX9_REPLAY_MODE_MASK                  REG_MASK(1 << 0)
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c b/src/mesa/drivers/dri/i965/brw_disk_cache.c
deleted file mode 100644
index b3fcb5e..0000000
--- a/src/mesa/drivers/dri/i965/brw_disk_cache.c
+++ /dev/null
@@ -1,417 +0,0 @@
-/*
- * Copyright Â© 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "compiler/glsl/ir_uniform.h"
-#include "compiler/glsl/shader_cache.h"
-#include "main/mtypes.h"
-#include "util/blob.h"
-#include "util/build_id.h"
-#include "util/debug.h"
-#include "util/disk_cache.h"
-#include "util/macros.h"
-#include "util/mesa-sha1.h"
-
-#include "compiler/brw_eu.h"
-#include "dev/intel_debug.h"
-
-#include "brw_context.h"
-#include "brw_program.h"
-#include "brw_cs.h"
-#include "brw_gs.h"
-#include "brw_state.h"
-#include "brw_vs.h"
-#include "brw_wm.h"
-
-static bool
-debug_enabled_for_stage(gl_shader_stage stage)
-{
-   static const uint64_t stage_debug_flags[] = {
-      DEBUG_VS, DEBUG_TCS, DEBUG_TES, DEBUG_GS, DEBUG_WM, DEBUG_CS,
-   };
-   assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_debug_flags));
-   return INTEL_DEBUG(stage_debug_flags[stage]);
-}
-
-static void
-intel_shader_sha1(struct gl_program *prog, gl_shader_stage stage,
-                void *key, unsigned char *out_sha1)
-{
-   char sha1_buf[41];
-   unsigned char sha1[20];
-   char manifest[256];
-   int offset = 0;
-
-   _mesa_sha1_format(sha1_buf, prog->sh.data->sha1);
-   offset += snprintf(manifest, sizeof(manifest), "program: %s\n", sha1_buf);
-
-   _mesa_sha1_compute(key, brw_prog_key_size(stage), sha1);
-   _mesa_sha1_format(sha1_buf, sha1);
-   offset += snprintf(manifest + offset, sizeof(manifest) - offset,
-                      "%s_key: %s\n", _mesa_shader_stage_to_abbrev(stage),
-                      sha1_buf);
-
-   _mesa_sha1_compute(manifest, strlen(manifest), out_sha1);
-}
-
-static bool
-read_blob_program_data(struct blob_reader *binary, struct gl_program *prog,
-                       gl_shader_stage stage, const uint8_t **program,
-                       struct brw_stage_prog_data *prog_data)
-{
-   return
-      brw_read_blob_program_data(binary, prog, stage, program, prog_data) &&
-      (binary->current == binary->end);
-}
-
-static bool
-read_and_upload(struct brw_context *brw, struct disk_cache *cache,
-                struct gl_program *prog, gl_shader_stage stage)
-{
-   unsigned char binary_sha1[20];
-
-   union brw_any_prog_key prog_key;
-
-   switch (stage) {
-   case MESA_SHADER_VERTEX:
-      brw_vs_populate_key(brw, &prog_key.vs);
-      break;
-   case MESA_SHADER_TESS_CTRL:
-      brw_tcs_populate_key(brw, &prog_key.tcs);
-      break;
-   case MESA_SHADER_TESS_EVAL:
-      brw_tes_populate_key(brw, &prog_key.tes);
-      break;
-   case MESA_SHADER_GEOMETRY:
-      brw_gs_populate_key(brw, &prog_key.gs);
-      break;
-   case MESA_SHADER_FRAGMENT:
-      brw_wm_populate_key(brw, &prog_key.wm);
-      break;
-   case MESA_SHADER_COMPUTE:
-      brw_cs_populate_key(brw, &prog_key.cs);
-      break;
-   default:
-      unreachable("Unsupported stage!");
-   }
-
-   /* We don't care what instance of the program it is for the disk cache hash
-    * lookup, so set the id to 0 for the sha1 hashing. program_string_id will
-    * be set below.
-    */
-   prog_key.base.program_string_id = 0;
-
-   intel_shader_sha1(prog, stage, &prog_key, binary_sha1);
-
-   size_t buffer_size;
-   uint8_t *buffer = disk_cache_get(cache, binary_sha1, &buffer_size);
-   if (buffer == NULL) {
-      if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
-         char sha1_buf[41];
-         _mesa_sha1_format(sha1_buf, binary_sha1);
-         fprintf(stderr, "No cached %s binary found for: %s\n",
-                 _mesa_shader_stage_to_abbrev(stage), sha1_buf);
-      }
-      return false;
-   }
-
-   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
-      char sha1_buf[41];
-      _mesa_sha1_format(sha1_buf, binary_sha1);
-      fprintf(stderr, "attempting to populate bo cache with binary: %s\n",
-              sha1_buf);
-   }
-
-   struct blob_reader binary;
-   blob_reader_init(&binary, buffer, buffer_size);
-
-   const uint8_t *program;
-   struct brw_stage_prog_data *prog_data =
-      ralloc_size(NULL, sizeof(union brw_any_prog_data));
-   if (!read_blob_program_data(&binary, prog, stage, &program, prog_data)) {
-      /* Something very bad has gone wrong discard the item from the cache and
-       * rebuild from source.
-       */
-      if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
-         fprintf(stderr, "Error reading program from cache (invalid i965 "
-                 "cache item)\n");
-      }
-
-      disk_cache_remove(cache, binary_sha1);
-      ralloc_free(prog_data);
-      free(buffer);
-      return false;
-   }
-
-   enum brw_cache_id cache_id;
-   struct brw_stage_state *stage_state;
-
-   switch (stage) {
-   case MESA_SHADER_VERTEX:
-      cache_id = BRW_CACHE_VS_PROG;
-      stage_state = &brw->vs.base;
-      break;
-   case MESA_SHADER_TESS_CTRL:
-      cache_id = BRW_CACHE_TCS_PROG;
-      stage_state = &brw->tcs.base;
-      break;
-   case MESA_SHADER_TESS_EVAL:
-      cache_id = BRW_CACHE_TES_PROG;
-      stage_state = &brw->tes.base;
-      break;
-   case MESA_SHADER_GEOMETRY:
-      cache_id = BRW_CACHE_GS_PROG;
-      stage_state = &brw->gs.base;
-      break;
-   case MESA_SHADER_FRAGMENT:
-      cache_id = BRW_CACHE_FS_PROG;
-      stage_state = &brw->wm.base;
-      break;
-   case MESA_SHADER_COMPUTE:
-      cache_id = BRW_CACHE_CS_PROG;
-      stage_state = &brw->cs.base;
-      break;
-   default:
-      unreachable("Unsupported stage!");
-   }
-
-   prog_key.base.program_string_id = brw_program(prog)->id;
-
-   brw_alloc_stage_scratch(brw, stage_state, prog_data->total_scratch);
-
-   if (unlikely(debug_enabled_for_stage(stage))) {
-      fprintf(stderr, "NIR for %s program %d loaded from disk shader cache:\n",
-              _mesa_shader_stage_to_abbrev(stage), brw_program(prog)->id);
-      brw_program_deserialize_driver_blob(&brw->ctx, prog, stage);
-      nir_shader *nir = prog->nir;
-      nir_print_shader(nir, stderr);
-      fprintf(stderr, "Native code for %s %s shader %s from disk cache:\n",
-              nir->info.label ? nir->info.label : "unnamed",
-              _mesa_shader_stage_to_string(nir->info.stage), nir->info.name);
-      brw_disassemble_with_labels(&brw->screen->devinfo, program, 0,
-                                  prog_data->program_size, stderr);
-   }
-
-   brw_upload_cache(&brw->cache, cache_id, &prog_key, brw_prog_key_size(stage),
-                    program, prog_data->program_size, prog_data,
-                    brw_prog_data_size(stage), &stage_state->prog_offset,
-                    &stage_state->prog_data);
-
-   prog->program_written_to_cache = true;
-
-   ralloc_free(prog_data);
-   free(buffer);
-
-   return true;
-}
-
-bool
-brw_disk_cache_upload_program(struct brw_context *brw, gl_shader_stage stage)
-{
-   struct disk_cache *cache = brw->ctx.Cache;
-   if (cache == NULL)
-      return false;
-
-   struct gl_program *prog = brw->ctx._Shader->CurrentProgram[stage];
-   if (prog == NULL)
-      return false;
-
-   if (prog->sh.data->spirv)
-      return false;
-
-   if (brw->ctx._Shader->Flags & GLSL_CACHE_FALLBACK)
-      goto fail;
-
-   if (!read_and_upload(brw, cache, prog, stage))
-      goto fail;
-
-   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
-      fprintf(stderr, "read gen program from cache\n");
-   }
-
-   return true;
-
-fail:
-   prog->program_written_to_cache = false;
-   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
-      fprintf(stderr, "falling back to nir %s.\n",
-              _mesa_shader_stage_to_abbrev(prog->info.stage));
-   }
-
-   brw_program_deserialize_driver_blob(&brw->ctx, prog, stage);
-
-   return false;
-}
-
-static void
-write_program_data(struct brw_context *brw, struct gl_program *prog,
-                   void *key, struct brw_stage_prog_data *prog_data,
-                   uint32_t prog_offset, struct disk_cache *cache,
-                   gl_shader_stage stage)
-{
-   struct blob binary;
-   blob_init(&binary);
-
-   const void *program_map = brw->cache.map + prog_offset;
-   /* TODO: Improve perf for non-LLC. It would be best to save it at program
-    * generation time when the program is in normal memory accessible with
-    * cache to the CPU. Another easier change would be to use
-    * _mesa_streaming_load_memcpy to read from the program mapped memory. */
-   brw_write_blob_program_data(&binary, stage, program_map, prog_data);
-
-   unsigned char sha1[20];
-   char buf[41];
-   intel_shader_sha1(prog, stage, key, sha1);
-   _mesa_sha1_format(buf, sha1);
-   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
-      fprintf(stderr, "putting binary in cache: %s\n", buf);
-   }
-
-   disk_cache_put(cache, sha1, binary.data, binary.size, NULL);
-
-   prog->program_written_to_cache = true;
-   blob_finish(&binary);
-}
-
-void
-brw_disk_cache_write_render_programs(struct brw_context *brw)
-{
-   struct disk_cache *cache = brw->ctx.Cache;
-   if (cache == NULL)
-      return;
-
-   struct gl_program *prog;
-   gl_shader_stage stage;
-   for (stage = MESA_SHADER_VERTEX; stage <= MESA_SHADER_FRAGMENT; stage++) {
-      prog = brw->ctx._Shader->CurrentProgram[stage];
-      if (prog && prog->sh.data->spirv)
-         return;
-   }
-
-   prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_VERTEX];
-   if (prog && !prog->program_written_to_cache) {
-      struct brw_vs_prog_key vs_key;
-      brw_vs_populate_key(brw, &vs_key);
-      vs_key.base.program_string_id = 0;
-
-      write_program_data(brw, prog, &vs_key, brw->vs.base.prog_data,
-                         brw->vs.base.prog_offset, cache,
-                         MESA_SHADER_VERTEX);
-   }
-
-   prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
-   if (prog && !prog->program_written_to_cache) {
-      struct brw_tcs_prog_key tcs_key;
-      brw_tcs_populate_key(brw, &tcs_key);
-      tcs_key.base.program_string_id = 0;
-
-      write_program_data(brw, prog, &tcs_key, brw->tcs.base.prog_data,
-                         brw->tcs.base.prog_offset, cache,
-                         MESA_SHADER_TESS_CTRL);
-   }
-
-   prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
-   if (prog && !prog->program_written_to_cache) {
-      struct brw_tes_prog_key tes_key;
-      brw_tes_populate_key(brw, &tes_key);
-      tes_key.base.program_string_id = 0;
-
-      write_program_data(brw, prog, &tes_key, brw->tes.base.prog_data,
-                         brw->tes.base.prog_offset, cache,
-                         MESA_SHADER_TESS_EVAL);
-   }
-
-   prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
-   if (prog && !prog->program_written_to_cache) {
-      struct brw_gs_prog_key gs_key;
-      brw_gs_populate_key(brw, &gs_key);
-      gs_key.base.program_string_id = 0;
-
-      write_program_data(brw, prog, &gs_key, brw->gs.base.prog_data,
-                         brw->gs.base.prog_offset, cache,
-                         MESA_SHADER_GEOMETRY);
-   }
-
-   prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
-   if (prog && !prog->program_written_to_cache) {
-      struct brw_wm_prog_key wm_key;
-      brw_wm_populate_key(brw, &wm_key);
-      wm_key.base.program_string_id = 0;
-
-      write_program_data(brw, prog, &wm_key, brw->wm.base.prog_data,
-                         brw->wm.base.prog_offset, cache,
-                         MESA_SHADER_FRAGMENT);
-   }
-}
-
-void
-brw_disk_cache_write_compute_program(struct brw_context *brw)
-{
-   struct disk_cache *cache = brw->ctx.Cache;
-   if (cache == NULL)
-      return;
-
-   struct gl_program *prog =
-      brw->ctx._Shader->CurrentProgram[MESA_SHADER_COMPUTE];
-
-   if (prog && prog->sh.data->spirv)
-      return;
-
-   if (prog && !prog->program_written_to_cache) {
-      struct brw_cs_prog_key cs_key;
-      brw_cs_populate_key(brw, &cs_key);
-      cs_key.base.program_string_id = 0;
-
-      write_program_data(brw, prog, &cs_key, brw->cs.base.prog_data,
-                         brw->cs.base.prog_offset, cache,
-                         MESA_SHADER_COMPUTE);
-   }
-}
-
-void
-brw_disk_cache_init(struct brw_screen *screen)
-{
-#ifdef ENABLE_SHADER_CACHE
-   if (INTEL_DEBUG(DEBUG_DISK_CACHE_DISABLE_MASK))
-      return;
-
-   /* array length: print length + null char + 1 extra to verify it is unused */
-   char renderer[11];
-   ASSERTED int len = snprintf(renderer, sizeof(renderer), "i965_%04x",
-                                   screen->deviceID);
-   assert(len == sizeof(renderer) - 2);
-
-   const struct build_id_note *note =
-      build_id_find_nhdr_for_addr(brw_disk_cache_init);
-   assert(note && build_id_length(note) == 20 /* sha1 */);
-
-   const uint8_t *id_sha1 = build_id_data(note);
-   assert(id_sha1);
-
-   char timestamp[41];
-   _mesa_sha1_format(timestamp, id_sha1);
-
-   const uint64_t driver_flags =
-      brw_get_compiler_config_value(screen->compiler);
-   screen->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
-#endif
-}
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
deleted file mode 100644
index 5d4f066..0000000
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ /dev/null
@@ -1,1361 +0,0 @@
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <sys/errno.h>
-
-#include "main/arrayobj.h"
-#include "main/blend.h"
-#include "main/context.h"
-#include "main/condrender.h"
-#include "main/samplerobj.h"
-#include "main/state.h"
-#include "main/enums.h"
-#include "main/macros.h"
-#include "main/transformfeedback.h"
-#include "main/framebuffer.h"
-#include "main/varray.h"
-#include "tnl/tnl.h"
-#include "vbo/vbo.h"
-#include "swrast/swrast.h"
-#include "swrast_setup/swrast_setup.h"
-#include "drivers/common/meta.h"
-#include "util/bitscan.h"
-#include "util/bitset.h"
-
-#include "brw_blorp.h"
-#include "brw_draw.h"
-#include "brw_defines.h"
-#include "compiler/brw_eu_defines.h"
-#include "brw_context.h"
-#include "brw_state.h"
-
-#include "brw_batch.h"
-#include "brw_buffers.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-#include "brw_buffer_objects.h"
-
-#define FILE_DEBUG_FLAG DEBUG_PRIMS
-
-
-static const GLenum reduced_prim[GL_POLYGON+1] = {
-   [GL_POINTS] = GL_POINTS,
-   [GL_LINES] = GL_LINES,
-   [GL_LINE_LOOP] = GL_LINES,
-   [GL_LINE_STRIP] = GL_LINES,
-   [GL_TRIANGLES] = GL_TRIANGLES,
-   [GL_TRIANGLE_STRIP] = GL_TRIANGLES,
-   [GL_TRIANGLE_FAN] = GL_TRIANGLES,
-   [GL_QUADS] = GL_TRIANGLES,
-   [GL_QUAD_STRIP] = GL_TRIANGLES,
-   [GL_POLYGON] = GL_TRIANGLES
-};
-
-/* When the primitive changes, set a state bit and re-validate.  Not
- * the nicest and would rather deal with this by having all the
- * programs be immune to the active primitive (ie. cope with all
- * possibilities).  That may not be realistic however.
- */
-static void
-brw_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
-{
-   struct gl_context *ctx = &brw->ctx;
-   uint32_t hw_prim = get_hw_prim_for_gl_prim(prim->mode);
-
-   DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode));
-
-   /* Slight optimization to avoid the GS program when not needed:
-    */
-   if (prim->mode == GL_QUAD_STRIP &&
-       ctx->Light.ShadeModel != GL_FLAT &&
-       ctx->Polygon.FrontMode == GL_FILL &&
-       ctx->Polygon.BackMode == GL_FILL)
-      hw_prim = _3DPRIM_TRISTRIP;
-
-   if (prim->mode == GL_QUADS && prim->count == 4 &&
-       ctx->Light.ShadeModel != GL_FLAT &&
-       ctx->Polygon.FrontMode == GL_FILL &&
-       ctx->Polygon.BackMode == GL_FILL) {
-      hw_prim = _3DPRIM_TRIFAN;
-   }
-
-   if (hw_prim != brw->primitive) {
-      brw->primitive = hw_prim;
-      brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE;
-
-      if (reduced_prim[prim->mode] != brw->reduced_primitive) {
-         brw->reduced_primitive = reduced_prim[prim->mode];
-         brw->ctx.NewDriverState |= BRW_NEW_REDUCED_PRIMITIVE;
-      }
-   }
-}
-
-static void
-gfx6_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
-{
-   const struct gl_context *ctx = &brw->ctx;
-   uint32_t hw_prim;
-
-   DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode));
-
-   if (prim->mode == GL_PATCHES) {
-      hw_prim = _3DPRIM_PATCHLIST(ctx->TessCtrlProgram.patch_vertices);
-   } else {
-      hw_prim = get_hw_prim_for_gl_prim(prim->mode);
-   }
-
-   if (hw_prim != brw->primitive) {
-      brw->primitive = hw_prim;
-      brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE;
-      if (prim->mode == GL_PATCHES)
-         brw->ctx.NewDriverState |= BRW_NEW_PATCH_PRIMITIVE;
-   }
-}
-
-
-/**
- * The hardware is capable of removing dangling vertices on its own; however,
- * prior to Gfx6, we sometimes convert quads into trifans (and quad strips
- * into tristrips), since pre-Gfx6 hardware requires a GS to render quads.
- * This function manually trims dangling vertices from a draw call involving
- * quads so that those dangling vertices won't get drawn when we convert to
- * trifans/tristrips.
- */
-static GLuint
-trim(GLenum prim, GLuint length)
-{
-   if (prim == GL_QUAD_STRIP)
-      return length > 3 ? (length - length % 2) : 0;
-   else if (prim == GL_QUADS)
-      return length - length % 4;
-   else
-      return length;
-}
-
-
-static void
-brw_emit_prim(struct brw_context *brw,
-              const struct _mesa_prim *prim,
-              uint32_t hw_prim,
-              bool is_indexed,
-              GLuint num_instances, GLuint base_instance,
-              struct brw_transform_feedback_object *xfb_obj,
-              unsigned stream,
-              bool is_indirect,
-              GLsizeiptr indirect_offset)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   int verts_per_instance;
-   int vertex_access_type;
-   int indirect_flag;
-
-   DBG("PRIM: %s %d %d\n", _mesa_enum_to_string(prim->mode),
-       prim->start, prim->count);
-
-   int start_vertex_location = prim->start;
-   int base_vertex_location = prim->basevertex;
-
-   if (is_indexed) {
-      vertex_access_type = devinfo->ver >= 7 ?
-         GFX7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
-         GFX4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
-      start_vertex_location += brw->ib.start_vertex_offset;
-      base_vertex_location += brw->vb.start_vertex_bias;
-   } else {
-      vertex_access_type = devinfo->ver >= 7 ?
-         GFX7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL :
-         GFX4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
-      start_vertex_location += brw->vb.start_vertex_bias;
-   }
-
-   /* We only need to trim the primitive count on pre-Gfx6. */
-   if (devinfo->ver < 6)
-      verts_per_instance = trim(prim->mode, prim->count);
-   else
-      verts_per_instance = prim->count;
-
-   /* If nothing to emit, just return. */
-   if (verts_per_instance == 0 && !is_indirect && !xfb_obj)
-      return;
-
-   /* If we're set to always flush, do it before and after the primitive emit.
-    * We want to catch both missed flushes that hurt instruction/state cache
-    * and missed flushes of the render cache as it heads to other parts of
-    * the besides the draw code.
-    */
-   if (brw->always_flush_cache)
-      brw_emit_mi_flush(brw);
-
-   /* If indirect, emit a bunch of loads from the indirect BO. */
-   if (xfb_obj) {
-      indirect_flag = GFX7_3DPRIM_INDIRECT_PARAMETER_ENABLE;
-
-      brw_load_register_mem(brw, GFX7_3DPRIM_VERTEX_COUNT,
-                            xfb_obj->prim_count_bo,
-                            stream * sizeof(uint32_t));
-      BEGIN_BATCH(9);
-      OUT_BATCH(MI_LOAD_REGISTER_IMM | (9 - 2));
-      OUT_BATCH(GFX7_3DPRIM_INSTANCE_COUNT);
-      OUT_BATCH(num_instances);
-      OUT_BATCH(GFX7_3DPRIM_START_VERTEX);
-      OUT_BATCH(0);
-      OUT_BATCH(GFX7_3DPRIM_BASE_VERTEX);
-      OUT_BATCH(0);
-      OUT_BATCH(GFX7_3DPRIM_START_INSTANCE);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   } else if (is_indirect) {
-      struct gl_buffer_object *indirect_buffer = brw->ctx.DrawIndirectBuffer;
-      struct brw_bo *bo = brw_bufferobj_buffer(brw,
-            brw_buffer_object(indirect_buffer),
-            indirect_offset, 5 * sizeof(GLuint), false);
-
-      indirect_flag = GFX7_3DPRIM_INDIRECT_PARAMETER_ENABLE;
-
-      brw_load_register_mem(brw, GFX7_3DPRIM_VERTEX_COUNT, bo,
-                            indirect_offset + 0);
-      brw_load_register_mem(brw, GFX7_3DPRIM_INSTANCE_COUNT, bo,
-                            indirect_offset + 4);
-
-      brw_load_register_mem(brw, GFX7_3DPRIM_START_VERTEX, bo,
-                            indirect_offset + 8);
-      if (is_indexed) {
-         brw_load_register_mem(brw, GFX7_3DPRIM_BASE_VERTEX, bo,
-                               indirect_offset + 12);
-         brw_load_register_mem(brw, GFX7_3DPRIM_START_INSTANCE, bo,
-                               indirect_offset + 16);
-      } else {
-         brw_load_register_mem(brw, GFX7_3DPRIM_START_INSTANCE, bo,
-                               indirect_offset + 12);
-         brw_load_register_imm32(brw, GFX7_3DPRIM_BASE_VERTEX, 0);
-      }
-   } else {
-      indirect_flag = 0;
-   }
-
-   BEGIN_BATCH(devinfo->ver >= 7 ? 7 : 6);
-
-   if (devinfo->ver >= 7) {
-      const int predicate_enable =
-         (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT)
-         ? GFX7_3DPRIM_PREDICATE_ENABLE : 0;
-
-      OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable);
-      OUT_BATCH(hw_prim | vertex_access_type);
-   } else {
-      OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
-                hw_prim << GFX4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
-                vertex_access_type);
-   }
-   OUT_BATCH(verts_per_instance);
-   OUT_BATCH(start_vertex_location);
-   OUT_BATCH(num_instances);
-   OUT_BATCH(base_instance);
-   OUT_BATCH(base_vertex_location);
-   ADVANCE_BATCH();
-
-   if (brw->always_flush_cache)
-      brw_emit_mi_flush(brw);
-}
-
-
-static void
-brw_clear_buffers(struct brw_context *brw)
-{
-   for (unsigned i = 0; i < brw->vb.nr_buffers; ++i) {
-      brw_bo_unreference(brw->vb.buffers[i].bo);
-      brw->vb.buffers[i].bo = NULL;
-   }
-   brw->vb.nr_buffers = 0;
-
-   for (unsigned i = 0; i < brw->vb.nr_enabled; ++i) {
-      brw->vb.enabled[i]->buffer = -1;
-   }
-#ifndef NDEBUG
-   for (unsigned i = 0; i < VERT_ATTRIB_MAX; i++) {
-      assert(brw->vb.inputs[i].buffer == -1);
-   }
-#endif
-}
-
-
-static uint8_t get_wa_flags(const struct gl_vertex_format *glformat)
-{
-   uint8_t wa_flags = 0;
-
-   switch (glformat->Type) {
-   case GL_FIXED:
-      wa_flags = glformat->Size;
-      break;
-
-   case GL_INT_2_10_10_10_REV:
-      wa_flags |= BRW_ATTRIB_WA_SIGN;
-      FALLTHROUGH;
-
-   case GL_UNSIGNED_INT_2_10_10_10_REV:
-      if (glformat->Format == GL_BGRA)
-         wa_flags |= BRW_ATTRIB_WA_BGRA;
-
-      if (glformat->Normalized)
-         wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
-      else if (!glformat->Integer)
-         wa_flags |= BRW_ATTRIB_WA_SCALE;
-
-      break;
-   }
-
-   return wa_flags;
-}
-
-
-static void
-brw_merge_inputs(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const struct gl_context *ctx = &brw->ctx;
-
-   if (devinfo->verx10 <= 70) {
-      /* Prior to Haswell, the hardware can't natively support GL_FIXED or
-       * 2_10_10_10_REV vertex formats.  Set appropriate workaround flags.
-       */
-      const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
-      const uint64_t vs_inputs = ctx->VertexProgram._Current->info.inputs_read;
-      assert((vs_inputs & ~((uint64_t)VERT_BIT_ALL)) == 0);
-
-      unsigned vaomask = vs_inputs & _mesa_draw_array_bits(ctx);
-      while (vaomask) {
-         const gl_vert_attrib i = u_bit_scan(&vaomask);
-         const uint8_t wa_flags =
-            get_wa_flags(_mesa_draw_array_format(vao, i));
-
-         if (brw->vb.attrib_wa_flags[i] != wa_flags) {
-            brw->vb.attrib_wa_flags[i] = wa_flags;
-            brw->ctx.NewDriverState |= BRW_NEW_VS_ATTRIB_WORKAROUNDS;
-         }
-      }
-
-      unsigned currmask = vs_inputs & _mesa_draw_current_bits(ctx);
-      while (currmask) {
-         const gl_vert_attrib i = u_bit_scan(&currmask);
-         const uint8_t wa_flags =
-            get_wa_flags(_mesa_draw_current_format(ctx, i));
-
-         if (brw->vb.attrib_wa_flags[i] != wa_flags) {
-            brw->vb.attrib_wa_flags[i] = wa_flags;
-            brw->ctx.NewDriverState |= BRW_NEW_VS_ATTRIB_WORKAROUNDS;
-         }
-      }
-   }
-}
-
-/* Disable auxiliary buffers if a renderbuffer is also bound as a texture
- * or shader image.  This causes a self-dependency, where both rendering
- * and sampling may concurrently read or write the CCS buffer, causing
- * incorrect pixels.
- */
-static bool
-brw_disable_rb_aux_buffer(struct brw_context *brw,
-                          bool *draw_aux_buffer_disabled,
-                          struct brw_mipmap_tree *tex_mt,
-                          unsigned min_level, unsigned num_levels,
-                          const char *usage)
-{
-   const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
-   bool found = false;
-
-   /* We only need to worry about color compression and fast clears. */
-   if (tex_mt->aux_usage != ISL_AUX_USAGE_CCS_D &&
-       tex_mt->aux_usage != ISL_AUX_USAGE_CCS_E)
-      return false;
-
-   for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
-      const struct brw_renderbuffer *irb =
-         brw_renderbuffer(fb->_ColorDrawBuffers[i]);
-
-      if (irb && irb->mt->bo == tex_mt->bo &&
-          irb->mt_level >= min_level &&
-          irb->mt_level < min_level + num_levels) {
-         found = draw_aux_buffer_disabled[i] = true;
-      }
-   }
-
-   if (found) {
-      perf_debug("Disabling CCS because a renderbuffer is also bound %s.\n",
-                 usage);
-   }
-
-   return found;
-}
-
-/** Implement the ASTC 5x5 sampler workaround
- *
- * Gfx9 sampling hardware has a bug where an ASTC 5x5 compressed surface
- * cannot live in the sampler cache at the same time as an aux compressed
- * surface.  In order to work around the bug we have to stall rendering with a
- * CS and pixel scoreboard stall (implicit in the CS stall) and invalidate the
- * texture cache whenever one of ASTC 5x5 or aux compressed may be in the
- * sampler cache and we're about to render with something which samples from
- * the other.
- *
- * In the case of a single shader which textures from both ASTC 5x5 and
- * a texture which is CCS or HiZ compressed, we have to resolve the aux
- * compressed texture prior to rendering.  This second part is handled in
- * brw_predraw_resolve_inputs() below.
- *
- * We have observed this issue to affect CCS and HiZ sampling but whether or
- * not it also affects MCS is unknown.  Because MCS has no concept of a
- * resolve (and doing one would be stupid expensive), we choose to simply
- * ignore the possibility and hope for the best.
- */
-static void
-gfx9_apply_astc5x5_wa_flush(struct brw_context *brw,
-                            enum gfx9_astc5x5_wa_tex_type curr_mask)
-{
-   assert(brw->screen->devinfo.ver == 9);
-
-   if (((brw->gfx9_astc5x5_wa_tex_mask & GFX9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
-        (curr_mask & GFX9_ASTC5X5_WA_TEX_TYPE_AUX)) ||
-       ((brw->gfx9_astc5x5_wa_tex_mask & GFX9_ASTC5X5_WA_TEX_TYPE_AUX) &&
-        (curr_mask & GFX9_ASTC5X5_WA_TEX_TYPE_ASTC5x5))) {
-      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
-      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
-   }
-
-   brw->gfx9_astc5x5_wa_tex_mask = curr_mask;
-}
-
-static enum gfx9_astc5x5_wa_tex_type
-gfx9_astc5x5_wa_bits(mesa_format format, enum isl_aux_usage aux_usage)
-{
-   if (aux_usage != ISL_AUX_USAGE_NONE &&
-       aux_usage != ISL_AUX_USAGE_MCS)
-      return GFX9_ASTC5X5_WA_TEX_TYPE_AUX;
-
-   if (format == MESA_FORMAT_RGBA_ASTC_5x5 ||
-       format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5)
-      return GFX9_ASTC5X5_WA_TEX_TYPE_ASTC5x5;
-
-   return 0;
-}
-
-/* Helper for the gfx9 ASTC 5x5 workaround.  This version exists for BLORP's
- * use-cases where only a single texture is bound.
- */
-void
-gfx9_apply_single_tex_astc5x5_wa(struct brw_context *brw,
-                                 mesa_format format,
-                                 enum isl_aux_usage aux_usage)
-{
-   gfx9_apply_astc5x5_wa_flush(brw, gfx9_astc5x5_wa_bits(format, aux_usage));
-}
-
-static void
-mark_textures_used_for_txf(BITSET_WORD *used_for_txf,
-                           const struct gl_program *prog)
-{
-   if (!prog)
-      return;
-
-   unsigned s;
-   BITSET_FOREACH_SET(s, prog->info.textures_used_by_txf, 32)
-      BITSET_SET(used_for_txf, prog->SamplerUnits[s]);
-}
-
-/**
- * \brief Resolve buffers before drawing.
- *
- * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
- * enabled depth texture, and flush the render cache for any dirty textures.
- */
-void
-brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
-                           bool *draw_aux_buffer_disabled)
-{
-   struct gl_context *ctx = &brw->ctx;
-   struct brw_texture_object *tex_obj;
-
-   BITSET_DECLARE(used_for_txf, MAX_COMBINED_TEXTURE_IMAGE_UNITS);
-   memset(used_for_txf, 0, sizeof(used_for_txf));
-   if (rendering) {
-      mark_textures_used_for_txf(used_for_txf, ctx->VertexProgram._Current);
-      mark_textures_used_for_txf(used_for_txf, ctx->TessCtrlProgram._Current);
-      mark_textures_used_for_txf(used_for_txf, ctx->TessEvalProgram._Current);
-      mark_textures_used_for_txf(used_for_txf, ctx->GeometryProgram._Current);
-      mark_textures_used_for_txf(used_for_txf, ctx->FragmentProgram._Current);
-   } else {
-      mark_textures_used_for_txf(used_for_txf, ctx->ComputeProgram._Current);
-   }
-
-   int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
-
-   enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits = 0;
-   if (brw->screen->devinfo.ver == 9) {
-      /* In order to properly implement the ASTC 5x5 workaround for an
-       * arbitrary draw or dispatch call, we have to walk the entire list of
-       * textures looking for ASTC 5x5.  If there is any ASTC 5x5 in this draw
-       * call, all aux compressed textures must be resolved and have aux
-       * compression disabled while sampling.
-       */
-      for (int i = 0; i <= maxEnabledUnit; i++) {
-         if (!ctx->Texture.Unit[i]._Current)
-            continue;
-         tex_obj = brw_texture_object(ctx->Texture.Unit[i]._Current);
-         if (!tex_obj || !tex_obj->mt)
-            continue;
-
-         astc5x5_wa_bits |= gfx9_astc5x5_wa_bits(tex_obj->_Format,
-                                                 tex_obj->mt->aux_usage);
-      }
-      gfx9_apply_astc5x5_wa_flush(brw, astc5x5_wa_bits);
-   }
-
-   /* Resolve depth buffer and render cache of each enabled texture. */
-   for (int i = 0; i <= maxEnabledUnit; i++) {
-      if (!ctx->Texture.Unit[i]._Current)
-         continue;
-      tex_obj = brw_texture_object(ctx->Texture.Unit[i]._Current);
-      if (!tex_obj || !tex_obj->mt)
-         continue;
-
-      struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, i);
-      enum isl_format view_format =
-         translate_tex_format(brw, tex_obj->_Format, sampler->Attrib.sRGBDecode);
-
-      unsigned min_level, min_layer, num_levels, num_layers;
-      if (tex_obj->base.Immutable) {
-         min_level  = tex_obj->base.Attrib.MinLevel;
-         num_levels = MIN2(tex_obj->base.Attrib.NumLevels, tex_obj->_MaxLevel + 1);
-         min_layer  = tex_obj->base.Attrib.MinLayer;
-         num_layers = tex_obj->base.Target != GL_TEXTURE_3D ?
-                      tex_obj->base.Attrib.NumLayers : INTEL_REMAINING_LAYERS;
-      } else {
-         min_level  = tex_obj->base.Attrib.BaseLevel;
-         num_levels = tex_obj->_MaxLevel - tex_obj->base.Attrib.BaseLevel + 1;
-         min_layer  = 0;
-         num_layers = INTEL_REMAINING_LAYERS;
-      }
-
-      if (rendering) {
-         brw_disable_rb_aux_buffer(brw, draw_aux_buffer_disabled,
-                                   tex_obj->mt, min_level, num_levels,
-                                   "for sampling");
-      }
-
-      brw_miptree_prepare_texture(brw, tex_obj->mt, view_format,
-                                  min_level, num_levels,
-                                  min_layer, num_layers,
-                                  astc5x5_wa_bits);
-
-      /* If any programs are using it with texelFetch, we may need to also do
-       * a prepare with an sRGB format to ensure texelFetch works "properly".
-       */
-      if (BITSET_TEST(used_for_txf, i)) {
-         enum isl_format txf_format =
-            translate_tex_format(brw, tex_obj->_Format, GL_DECODE_EXT);
-         if (txf_format != view_format) {
-            brw_miptree_prepare_texture(brw, tex_obj->mt, txf_format,
-                                        min_level, num_levels,
-                                        min_layer, num_layers,
-                                        astc5x5_wa_bits);
-         }
-      }
-
-      brw_cache_flush_for_read(brw, tex_obj->mt->bo);
-
-      if (tex_obj->base.StencilSampling ||
-          tex_obj->mt->format == MESA_FORMAT_S_UINT8) {
-         brw_update_r8stencil(brw, tex_obj->mt);
-      }
-
-      if (brw_miptree_has_etc_shadow(brw, tex_obj->mt) &&
-          tex_obj->mt->shadow_needs_update) {
-         brw_miptree_update_etc_shadow_levels(brw, tex_obj->mt);
-      }
-   }
-
-   /* Resolve color for each active shader image. */
-   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-      const struct gl_program *prog = ctx->_Shader->CurrentProgram[i];
-
-      if (unlikely(prog && prog->info.num_images)) {
-         for (unsigned j = 0; j < prog->info.num_images; j++) {
-            struct gl_image_unit *u =
-               &ctx->ImageUnits[prog->sh.ImageUnits[j]];
-            tex_obj = brw_texture_object(u->TexObj);
-
-            if (tex_obj && tex_obj->mt) {
-               if (rendering) {
-                  brw_disable_rb_aux_buffer(brw, draw_aux_buffer_disabled,
-                                            tex_obj->mt, 0, ~0,
-                                            "as a shader image");
-               }
-
-               brw_miptree_prepare_image(brw, tex_obj->mt);
-
-               brw_cache_flush_for_read(brw, tex_obj->mt->bo);
-            }
-         }
-      }
-   }
-}
-
-static void
-brw_predraw_resolve_framebuffer(struct brw_context *brw,
-                                bool *draw_aux_buffer_disabled)
-{
-   struct gl_context *ctx = &brw->ctx;
-   struct brw_renderbuffer *depth_irb;
-
-   /* Resolve the depth buffer's HiZ buffer. */
-   depth_irb = brw_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
-   if (depth_irb && depth_irb->mt) {
-      brw_miptree_prepare_depth(brw, depth_irb->mt,
-                                depth_irb->mt_level,
-                                depth_irb->mt_layer,
-                                depth_irb->layer_count);
-   }
-
-   /* Resolve color buffers for non-coherent framebuffer fetch. */
-   if (!ctx->Extensions.EXT_shader_framebuffer_fetch &&
-       ctx->FragmentProgram._Current &&
-       ctx->FragmentProgram._Current->info.outputs_read) {
-      const struct gl_framebuffer *fb = ctx->DrawBuffer;
-
-      /* This is only used for non-coherent framebuffer fetch, so we don't
-       * need to worry about CCS_E and can simply pass 'false' below.
-       */
-      assert(brw->screen->devinfo.ver < 9);
-
-      for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
-         const struct brw_renderbuffer *irb =
-            brw_renderbuffer(fb->_ColorDrawBuffers[i]);
-
-         if (irb) {
-            brw_miptree_prepare_texture(brw, irb->mt, irb->mt->surf.format,
-                                        irb->mt_level, 1,
-                                        irb->mt_layer, irb->layer_count,
-                                        brw->gfx9_astc5x5_wa_tex_mask);
-         }
-      }
-   }
-
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
-   for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
-      struct brw_renderbuffer *irb =
-         brw_renderbuffer(fb->_ColorDrawBuffers[i]);
-
-      if (irb == NULL || irb->mt == NULL)
-         continue;
-
-      mesa_format mesa_format =
-         _mesa_get_render_format(ctx, brw_rb_format(irb));
-      enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
-      bool blend_enabled = ctx->Color.BlendEnabled & (1 << i);
-      enum isl_aux_usage aux_usage =
-         brw_miptree_render_aux_usage(brw, irb->mt, isl_format,
-                                      blend_enabled,
-                                      draw_aux_buffer_disabled[i]);
-      if (brw->draw_aux_usage[i] != aux_usage) {
-         brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE;
-         brw->draw_aux_usage[i] = aux_usage;
-      }
-
-      brw_miptree_prepare_render(brw, irb->mt, irb->mt_level,
-                                 irb->mt_layer, irb->layer_count,
-                                 aux_usage);
-
-      brw_cache_flush_for_render(brw, irb->mt->bo,
-                                 isl_format, aux_usage);
-   }
-}
-
-/**
- * \brief Call this after drawing to mark which buffers need resolving
- *
- * If the depth buffer was written to and if it has an accompanying HiZ
- * buffer, then mark that it needs a depth resolve.
- *
- * If the stencil buffer was written to then mark that it may need to be
- * copied to an R8 texture.
- *
- * If the color buffer is a multisample window system buffer, then
- * mark that it needs a downsample.
- *
- * Also mark any render targets which will be textured as needing a render
- * cache flush.
- */
-static void
-brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
-
-   struct brw_renderbuffer *front_irb = NULL;
-   struct brw_renderbuffer *back_irb = brw_get_renderbuffer(fb, BUFFER_BACK_LEFT);
-   struct brw_renderbuffer *depth_irb = brw_get_renderbuffer(fb, BUFFER_DEPTH);
-   struct brw_renderbuffer *stencil_irb = brw_get_renderbuffer(fb, BUFFER_STENCIL);
-   struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH];
-
-   if (_mesa_is_front_buffer_drawing(fb))
-      front_irb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
-
-   if (front_irb)
-      front_irb->need_downsample = true;
-   if (back_irb)
-      back_irb->need_downsample = true;
-   if (depth_irb) {
-      bool depth_written = brw_depth_writes_enabled(brw);
-      if (depth_att->Layered) {
-         brw_miptree_finish_depth(brw, depth_irb->mt,
-                                  depth_irb->mt_level,
-                                  depth_irb->mt_layer,
-                                  depth_irb->layer_count,
-                                  depth_written);
-      } else {
-         brw_miptree_finish_depth(brw, depth_irb->mt,
-                                  depth_irb->mt_level,
-                                  depth_irb->mt_layer, 1,
-                                  depth_written);
-      }
-      if (depth_written)
-         brw_depth_cache_add_bo(brw, depth_irb->mt->bo);
-   }
-
-   if (stencil_irb && brw->stencil_write_enabled) {
-      struct brw_mipmap_tree *stencil_mt =
-         stencil_irb->mt->stencil_mt != NULL ?
-         stencil_irb->mt->stencil_mt : stencil_irb->mt;
-      brw_depth_cache_add_bo(brw, stencil_mt->bo);
-      brw_miptree_finish_write(brw, stencil_mt, stencil_irb->mt_level,
-                               stencil_irb->mt_layer,
-                               stencil_irb->layer_count, ISL_AUX_USAGE_NONE);
-   }
-
-   for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
-      struct brw_renderbuffer *irb =
-         brw_renderbuffer(fb->_ColorDrawBuffers[i]);
-
-      if (!irb)
-         continue;
-
-      mesa_format mesa_format =
-         _mesa_get_render_format(ctx, brw_rb_format(irb));
-      enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
-      enum isl_aux_usage aux_usage = brw->draw_aux_usage[i];
-
-      brw_render_cache_add_bo(brw, irb->mt->bo, isl_format, aux_usage);
-
-      brw_miptree_finish_render(brw, irb->mt, irb->mt_level,
-                                irb->mt_layer, irb->layer_count,
-                                aux_usage);
-   }
-}
-
-static void
-brw_renderbuffer_move_temp_back(struct brw_context *brw,
-                                struct brw_renderbuffer *irb)
-{
-   if (irb->align_wa_mt == NULL)
-      return;
-
-   brw_cache_flush_for_read(brw, irb->align_wa_mt->bo);
-
-   brw_miptree_copy_slice(brw, irb->align_wa_mt, 0, 0,
-                            irb->mt,
-                            irb->Base.Base.TexImage->Level, irb->mt_layer);
-
-   brw_miptree_reference(&irb->align_wa_mt, NULL);
-
-   /* Finally restore the x,y to correspond to full miptree. */
-   brw_renderbuffer_set_draw_offset(irb);
-
-   /* Make sure render surface state gets re-emitted with updated miptree. */
-   brw->NewGLState |= _NEW_BUFFERS;
-}
-
-static void
-brw_postdraw_reconcile_align_wa_slices(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
-
-   struct brw_renderbuffer *depth_irb =
-      brw_get_renderbuffer(fb, BUFFER_DEPTH);
-   struct brw_renderbuffer *stencil_irb =
-      brw_get_renderbuffer(fb, BUFFER_STENCIL);
-
-   if (depth_irb && depth_irb->align_wa_mt)
-      brw_renderbuffer_move_temp_back(brw, depth_irb);
-
-   if (stencil_irb && stencil_irb->align_wa_mt)
-      brw_renderbuffer_move_temp_back(brw, stencil_irb);
-
-   for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
-      struct brw_renderbuffer *irb =
-         brw_renderbuffer(fb->_ColorDrawBuffers[i]);
-
-      if (!irb || irb->align_wa_mt == NULL)
-         continue;
-
-      brw_renderbuffer_move_temp_back(brw, irb);
-   }
-}
-
-static void
-brw_prepare_drawing(struct gl_context *ctx,
-                    const struct _mesa_index_buffer *ib,
-                    bool index_bounds_valid,
-                    GLuint min_index,
-                    GLuint max_index)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   if (ctx->NewState)
-      _mesa_update_state(ctx);
-
-   /* We have to validate the textures *before* checking for fallbacks;
-    * otherwise, the software fallback won't be able to rely on the
-    * texture state, the firstLevel and lastLevel fields won't be
-    * set in the intel texture object (they'll both be 0), and the
-    * software fallback will segfault if it attempts to access any
-    * texture level other than level 0.
-    */
-   brw_validate_textures(brw);
-
-   /* Find the highest sampler unit used by each shader program.  A bit-count
-    * won't work since ARB programs use the texture unit number as the sampler
-    * index.
-    */
-   brw->wm.base.sampler_count =
-      BITSET_LAST_BIT(ctx->FragmentProgram._Current->info.textures_used);
-   brw->gs.base.sampler_count = ctx->GeometryProgram._Current ?
-      BITSET_LAST_BIT(ctx->GeometryProgram._Current->info.textures_used) : 0;
-   brw->tes.base.sampler_count = ctx->TessEvalProgram._Current ?
-      BITSET_LAST_BIT(ctx->TessEvalProgram._Current->info.textures_used) : 0;
-   brw->tcs.base.sampler_count = ctx->TessCtrlProgram._Current ?
-      BITSET_LAST_BIT(ctx->TessCtrlProgram._Current->info.textures_used) : 0;
-   brw->vs.base.sampler_count =
-      BITSET_LAST_BIT(ctx->VertexProgram._Current->info.textures_used);
-
-   brw_prepare_render(brw);
-
-   /* This workaround has to happen outside of brw_upload_render_state()
-    * because it may flush the batchbuffer for a blit, affecting the state
-    * flags.
-    */
-   brw_workaround_depthstencil_alignment(brw, 0);
-
-   /* Resolves must occur after updating renderbuffers, updating context state,
-    * and finalizing textures but before setting up any hardware state for
-    * this draw call.
-    */
-   bool draw_aux_buffer_disabled[MAX_DRAW_BUFFERS] = { };
-   brw_predraw_resolve_inputs(brw, true, draw_aux_buffer_disabled);
-   brw_predraw_resolve_framebuffer(brw, draw_aux_buffer_disabled);
-
-   /* Bind all inputs, derive varying and size information:
-    */
-   brw_clear_buffers(brw);
-   brw_merge_inputs(brw);
-
-   brw->ib.ib = ib;
-   brw->ctx.NewDriverState |= BRW_NEW_INDICES;
-
-   brw->vb.index_bounds_valid = index_bounds_valid;
-   brw->vb.min_index = min_index;
-   brw->vb.max_index = max_index;
-   brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
-}
-
-static void
-brw_finish_drawing(struct gl_context *ctx)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   if (brw->always_flush_batch)
-      brw_batch_flush(brw);
-
-   brw_program_cache_check_size(brw);
-   brw_postdraw_reconcile_align_wa_slices(brw);
-   brw_postdraw_set_buffers_need_resolve(brw);
-
-   if (brw->draw.draw_params_count_bo) {
-      brw_bo_unreference(brw->draw.draw_params_count_bo);
-      brw->draw.draw_params_count_bo = NULL;
-   }
-
-   if (brw->draw.draw_params_bo) {
-      brw_bo_unreference(brw->draw.draw_params_bo);
-      brw->draw.draw_params_bo = NULL;
-   }
-
-   if (brw->draw.derived_draw_params_bo) {
-      brw_bo_unreference(brw->draw.derived_draw_params_bo);
-      brw->draw.derived_draw_params_bo = NULL;
-   }
-}
-
-/**
- * Implement workarounds for preemption:
- *    - WaDisableMidObjectPreemptionForGSLineStripAdj
- *    - WaDisableMidObjectPreemptionForTrifanOrPolygon
- *    - WaDisableMidObjectPreemptionForLineLoop
- *    - WA#0798
- */
-static void
-gfx9_emit_preempt_wa(struct brw_context *brw,
-                     const struct _mesa_prim *prim, GLuint num_instances)
-{
-   bool object_preemption = true;
-   ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   /* Only apply these workarounds for gfx9 */
-   assert(devinfo->ver == 9);
-
-   /* WaDisableMidObjectPreemptionForGSLineStripAdj
-    *
-    *    WA: Disable mid-draw preemption when draw-call is a linestrip_adj and
-    *    GS is enabled.
-    */
-   if (brw->primitive == _3DPRIM_LINESTRIP_ADJ && brw->gs.enabled)
-      object_preemption = false;
-
-   /* WaDisableMidObjectPreemptionForTrifanOrPolygon
-    *
-    *    TriFan miscompare in Execlist Preemption test. Cut index that is on a
-    *    previous context. End the previous, the resume another context with a
-    *    tri-fan or polygon, and the vertex count is corrupted. If we prempt
-    *    again we will cause corruption.
-    *
-    *    WA: Disable mid-draw preemption when draw-call has a tri-fan.
-    */
-   if (brw->primitive == _3DPRIM_TRIFAN)
-      object_preemption = false;
-
-   /* WaDisableMidObjectPreemptionForLineLoop
-    *
-    *    VF Stats Counters Missing a vertex when preemption enabled.
-    *
-    *    WA: Disable mid-draw preemption when the draw uses a lineloop
-    *    topology.
-    */
-   if (brw->primitive == _3DPRIM_LINELOOP)
-      object_preemption = false;
-
-   /* WA#0798
-    *
-    *    VF is corrupting GAFS data when preempted on an instance boundary and
-    *    replayed with instancing enabled.
-    *
-    *    WA: Disable preemption when using instanceing.
-    */
-   if (num_instances > 1)
-      object_preemption = false;
-
-   brw_enable_obj_preemption(brw, object_preemption);
-}
-
-/* May fail if out of video memory for texture or vbo upload, or on
- * fallback conditions.
- */
-static void
-brw_draw_single_prim(struct gl_context *ctx,
-                     const struct _mesa_prim *prim,
-                     unsigned prim_id,
-                     bool is_indexed,
-                     GLuint num_instances, GLuint base_instance,
-                     struct brw_transform_feedback_object *xfb_obj,
-                     unsigned stream,
-                     GLsizeiptr indirect_offset)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   bool fail_next;
-   bool is_indirect = brw->draw.draw_indirect_data != NULL;
-
-   /* Flag BRW_NEW_DRAW_CALL on every draw.  This allows us to have
-    * atoms that happen on every draw call.
-    */
-   brw->ctx.NewDriverState |= BRW_NEW_DRAW_CALL;
-
-   /* Flush the batch if the batch/state buffers are nearly full.  We can
-    * grow them if needed, but this is not free, so we'd like to avoid it.
-    */
-   brw_batch_require_space(brw, 1500);
-   brw_require_statebuffer_space(brw, 2400);
-   brw_batch_save_state(brw);
-   fail_next = brw_batch_saved_state_is_empty(brw);
-
-   if (brw->num_instances != num_instances ||
-       brw->basevertex != prim->basevertex ||
-       brw->baseinstance != base_instance) {
-      brw->num_instances = num_instances;
-      brw->basevertex = prim->basevertex;
-      brw->baseinstance = base_instance;
-      if (prim_id > 0) { /* For i == 0 we just did this before the loop */
-         brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
-         brw_clear_buffers(brw);
-      }
-   }
-
-   /* Determine if we need to flag BRW_NEW_VERTICES for updating the
-    * gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect draw, we
-    * always flag if the shader uses one of the values. For direct draws,
-    * we only flag if the values change.
-    */
-   const int new_firstvertex =
-      is_indexed ? prim->basevertex : prim->start;
-   const int new_baseinstance = base_instance;
-   const struct brw_vs_prog_data *vs_prog_data =
-      brw_vs_prog_data(brw->vs.base.prog_data);
-   if (prim_id > 0) {
-      const bool uses_draw_parameters =
-         vs_prog_data->uses_firstvertex ||
-         vs_prog_data->uses_baseinstance;
-
-      if ((uses_draw_parameters && is_indirect) ||
-          (vs_prog_data->uses_firstvertex &&
-           brw->draw.params.firstvertex != new_firstvertex) ||
-          (vs_prog_data->uses_baseinstance &&
-           brw->draw.params.gl_baseinstance != new_baseinstance))
-         brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
-   }
-
-   brw->draw.params.firstvertex = new_firstvertex;
-   brw->draw.params.gl_baseinstance = new_baseinstance;
-   brw_bo_unreference(brw->draw.draw_params_bo);
-
-   if (is_indirect) {
-      /* Point draw_params_bo at the indirect buffer. */
-      brw->draw.draw_params_bo =
-         brw_buffer_object(ctx->DrawIndirectBuffer)->buffer;
-      brw_bo_reference(brw->draw.draw_params_bo);
-      brw->draw.draw_params_offset =
-         indirect_offset + (is_indexed ? 12 : 8);
-   } else {
-      /* Set draw_params_bo to NULL so brw_prepare_vertices knows it
-       * has to upload gl_BaseVertex and such if they're needed.
-       */
-      brw->draw.draw_params_bo = NULL;
-      brw->draw.draw_params_offset = 0;
-   }
-
-   /* gl_DrawID always needs its own vertex buffer since it's not part of
-    * the indirect parameter buffer. Same for is_indexed_draw, which shares
-    * the buffer with gl_DrawID. If the program uses gl_DrawID, we need to
-    * flag BRW_NEW_VERTICES. For the first iteration, we don't have valid
-    * vs_prog_data, but we always flag BRW_NEW_VERTICES before the loop.
-    */
-   if (prim_id > 0 && vs_prog_data->uses_drawid)
-      brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
-
-   brw->draw.derived_params.gl_drawid = prim->draw_id;
-   brw->draw.derived_params.is_indexed_draw = is_indexed ? ~0 : 0;
-
-   brw_bo_unreference(brw->draw.derived_draw_params_bo);
-   brw->draw.derived_draw_params_bo = NULL;
-   brw->draw.derived_draw_params_offset = 0;
-
-   if (devinfo->ver < 6)
-      brw_set_prim(brw, prim);
-   else
-      gfx6_set_prim(brw, prim);
-
-retry:
-
-   /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and
-    * that the state updated in the loop outside of this block is that in
-    * *_set_prim or brw_batch_flush(), which only impacts
-    * brw->ctx.NewDriverState.
-    */
-   if (brw->ctx.NewDriverState) {
-      brw->batch.no_wrap = true;
-      brw_upload_render_state(brw);
-   }
-
-   if (devinfo->ver == 9)
-      gfx9_emit_preempt_wa(brw, prim, num_instances);
-
-   brw_emit_prim(brw, prim, brw->primitive, is_indexed, num_instances,
-                 base_instance, xfb_obj, stream, is_indirect,
-                 indirect_offset);
-
-   brw->batch.no_wrap = false;
-
-   if (!brw_batch_has_aperture_space(brw, 0)) {
-      if (!fail_next) {
-         brw_batch_reset_to_saved(brw);
-         brw_batch_flush(brw);
-         fail_next = true;
-         goto retry;
-      } else {
-         int ret = brw_batch_flush(brw);
-         WARN_ONCE(ret == -ENOSPC,
-                   "i965: Single primitive emit exceeded "
-                   "available aperture space\n");
-      }
-   }
-
-   /* Now that we know we haven't run out of aperture space, we can safely
-    * reset the dirty bits.
-    */
-   if (brw->ctx.NewDriverState)
-      brw_render_state_finished(brw);
-
-   return;
-}
-
-
-
-void
-brw_draw_prims(struct gl_context *ctx,
-               const struct _mesa_prim *prims,
-               unsigned nr_prims,
-               const struct _mesa_index_buffer *ib,
-               bool index_bounds_valid,
-               bool primitive_restart,
-               unsigned restart_index,
-               unsigned min_index,
-               unsigned max_index,
-               unsigned num_instances,
-               unsigned base_instance)
-{
-   unsigned i;
-   struct brw_context *brw = brw_context(ctx);
-   int predicate_state = brw->predicate.state;
-
-   if (!brw_check_conditional_render(brw))
-      return;
-
-   /* Handle primitive restart if needed */
-   if (brw_handle_primitive_restart(ctx, prims, nr_prims, ib, num_instances,
-                                    base_instance, primitive_restart,
-                                    restart_index)) {
-      /* The draw was handled, so we can exit now */
-      return;
-   }
-
-   /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it
-    * won't support all the extensions we support.
-    */
-   if (ctx->RenderMode != GL_RENDER) {
-      perf_debug("%s render mode not supported in hardware\n",
-                 _mesa_enum_to_string(ctx->RenderMode));
-      _swsetup_Wakeup(ctx);
-      _tnl_wakeup(ctx);
-      _tnl_draw(ctx, prims, nr_prims, ib, index_bounds_valid,
-                primitive_restart, restart_index, min_index,
-                max_index, num_instances, base_instance);
-      return;
-   }
-
-   /* If we're going to have to upload any of the user's vertex arrays, then
-    * get the minimum and maximum of their index buffer so we know what range
-    * to upload.
-    */
-   if (!index_bounds_valid && _mesa_draw_user_array_bits(ctx) != 0) {
-      perf_debug("Scanning index buffer to compute index buffer bounds.  "
-                 "Use glDrawRangeElements() to avoid this.\n");
-      vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims,
-                             primitive_restart, restart_index);
-      index_bounds_valid = true;
-   }
-
-   brw_prepare_drawing(ctx, ib, index_bounds_valid, min_index, max_index);
-   /* Try drawing with the hardware, but don't do anything else if we can't
-    * manage it.  swrast doesn't support our featureset, so we can't fall back
-    * to it.
-    */
-
-   for (i = 0; i < nr_prims; i++) {
-      /* Implementation of ARB_indirect_parameters via predicates */
-      if (brw->draw.draw_params_count_bo) {
-         brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
-
-         /* Upload the current draw count from the draw parameters buffer to
-          * MI_PREDICATE_SRC0.
-          */
-         brw_load_register_mem(brw, MI_PREDICATE_SRC0,
-                               brw->draw.draw_params_count_bo,
-                               brw->draw.draw_params_count_offset);
-         /* Zero the top 32-bits of MI_PREDICATE_SRC0 */
-         brw_load_register_imm32(brw, MI_PREDICATE_SRC0 + 4, 0);
-         /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
-         brw_load_register_imm64(brw, MI_PREDICATE_SRC1, prims[i].draw_id);
-
-         BEGIN_BATCH(1);
-         if (i == 0 && brw->predicate.state != BRW_PREDICATE_STATE_USE_BIT) {
-            OUT_BATCH(GFX7_MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
-                      MI_PREDICATE_COMBINEOP_SET |
-                      MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
-         } else {
-            OUT_BATCH(GFX7_MI_PREDICATE |
-                      MI_PREDICATE_LOADOP_LOAD | MI_PREDICATE_COMBINEOP_XOR |
-                      MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
-         }
-         ADVANCE_BATCH();
-
-         brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
-      }
-
-      brw_draw_single_prim(ctx, &prims[i], i, ib != NULL, num_instances,
-                           base_instance, NULL, 0,
-                           brw->draw.draw_indirect_offset +
-                           brw->draw.draw_indirect_stride * i);
-   }
-
-   brw_finish_drawing(ctx);
-   brw->predicate.state = predicate_state;
-}
-
-static void
-brw_draw_transform_feedback(struct gl_context *ctx, GLenum mode,
-                            unsigned num_instances, unsigned stream,
-                            struct gl_transform_feedback_object *gl_xfb_obj)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_transform_feedback_object *xfb_obj =
-      (struct brw_transform_feedback_object *) gl_xfb_obj;
-
-   if (!brw_check_conditional_render(brw))
-      return;
-
-   /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it
-    * won't support all the extensions we support.
-    */
-   if (ctx->RenderMode != GL_RENDER) {
-      perf_debug("%s render mode not supported in hardware\n",
-                 _mesa_enum_to_string(ctx->RenderMode));
-      /* swrast doesn't support DrawTransformFeedback. Nothing to do. */
-      return;
-   }
-
-   brw_prepare_drawing(ctx, NULL, false, 0, ~0);
-
-   struct _mesa_prim prim;
-   memset(&prim, 0, sizeof(prim));
-   prim.begin = 1;
-   prim.end = 1;
-   prim.mode = mode;
-
-   /* Try drawing with the hardware, but don't do anything else if we can't
-    * manage it.  swrast doesn't support our featureset, so we can't fall back
-    * to it.
-    */
-   brw_draw_single_prim(ctx, &prim, 0, false, num_instances, 0, xfb_obj,
-                        stream, 0);
-   brw_finish_drawing(ctx);
-}
-
-void
-brw_draw_indirect_prims(struct gl_context *ctx,
-                        GLuint mode,
-                        struct gl_buffer_object *indirect_data,
-                        GLsizeiptr indirect_offset,
-                        unsigned draw_count,
-                        unsigned stride,
-                        struct gl_buffer_object *indirect_params,
-                        GLsizeiptr indirect_params_offset,
-                        const struct _mesa_index_buffer *ib,
-                        bool primitive_restart,
-                        unsigned restart_index)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct _mesa_prim *prim;
-   GLsizei i;
-
-   prim = calloc(draw_count, sizeof(*prim));
-   if (prim == NULL) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "gl%sDraw%sIndirect%s",
-                  (draw_count > 1) ? "Multi" : "",
-                  ib ? "Elements" : "Arrays",
-                  indirect_params ? "CountARB" : "");
-      return;
-   }
-
-   brw->draw.draw_indirect_stride = stride;
-   brw->draw.draw_indirect_offset = indirect_offset;
-
-   prim[0].begin = 1;
-   prim[draw_count - 1].end = 1;
-   for (i = 0; i < draw_count; ++i) {
-      prim[i].mode = mode;
-      prim[i].draw_id = i;
-   }
-
-   if (indirect_params) {
-      brw->draw.draw_params_count_bo =
-         brw_buffer_object(indirect_params)->buffer;
-      brw_bo_reference(brw->draw.draw_params_count_bo);
-      brw->draw.draw_params_count_offset = indirect_params_offset;
-   }
-
-   brw->draw.draw_indirect_data = indirect_data;
-
-   brw_draw_prims(ctx, prim, draw_count, ib, false, primitive_restart,
-                  restart_index, 0, ~0, 0, 0);
-
-   brw->draw.draw_indirect_data = NULL;
-   free(prim);
-}
-
-void
-brw_init_draw_functions(struct dd_function_table *functions)
-{
-   /* Register our drawing function:
-    */
-   functions->Draw = brw_draw_prims;
-   functions->DrawTransformFeedback = brw_draw_transform_feedback;
-   functions->DrawIndirect = brw_draw_indirect_prims;
-}
-
-void
-brw_draw_init(struct brw_context *brw)
-{
-   for (int i = 0; i < VERT_ATTRIB_MAX; i++)
-      brw->vb.inputs[i].buffer = -1;
-   brw->vb.nr_buffers = 0;
-   brw->vb.nr_enabled = 0;
-}
-
-void
-brw_draw_destroy(struct brw_context *brw)
-{
-   unsigned i;
-
-   for (i = 0; i < brw->vb.nr_buffers; i++) {
-      brw_bo_unreference(brw->vb.buffers[i].bo);
-      brw->vb.buffers[i].bo = NULL;
-   }
-   brw->vb.nr_buffers = 0;
-
-   for (i = 0; i < brw->vb.nr_enabled; i++) {
-      brw->vb.enabled[i]->buffer = -1;
-   }
-   brw->vb.nr_enabled = 0;
-
-   brw_bo_unreference(brw->ib.bo);
-   brw->ib.bo = NULL;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h
deleted file mode 100644
index d9ab2f3..0000000
--- a/src/mesa/drivers/dri/i965/brw_draw.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright 2005 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef BRW_DRAW_H
-#define BRW_DRAW_H
-
-#include "main/mtypes.h"
-#include "brw_bufmgr.h"
-
-struct brw_context;
-
-uint32_t *
-brw_emit_vertex_buffer_state(struct brw_context *brw,
-                             unsigned buffer_nr,
-                             struct brw_bo *bo,
-                             unsigned start_offset,
-                             unsigned end_offset,
-                             unsigned stride,
-                             unsigned step_rate,
-                             uint32_t *__map);
-
-#define EMIT_VERTEX_BUFFER_STATE(...) __map = \
-   brw_emit_vertex_buffer_state(__VA_ARGS__, __map)
-
-void brw_draw_prims(struct gl_context *ctx,
-                    const struct _mesa_prim *prims,
-                    unsigned nr_prims,
-                    const struct _mesa_index_buffer *ib,
-                    bool index_bounds_valid,
-                    bool primitive_restart,
-                    unsigned restart_index,
-                    unsigned min_index,
-                    unsigned max_index,
-                    unsigned num_instances,
-                    unsigned base_instance);
-
-void brw_init_draw_functions(struct dd_function_table *functions);
-void brw_draw_init( struct brw_context *brw );
-void brw_draw_destroy( struct brw_context *brw );
-
-void brw_prepare_shader_draw_parameters(struct brw_context *);
-
-/* brw_primitive_restart.c */
-GLboolean
-brw_handle_primitive_restart(struct gl_context *ctx,
-                             const struct _mesa_prim *prims,
-                             GLuint nr_prims,
-                             const struct _mesa_index_buffer *ib,
-                             GLuint num_instances, GLuint base_instance,
-                             bool primitive_restart,
-                             unsigned restart_index);
-
-void
-brw_draw_indirect_prims(struct gl_context *ctx,
-                        GLuint mode,
-                        struct gl_buffer_object *indirect_data,
-                        GLsizeiptr indirect_offset,
-                        unsigned draw_count,
-                        unsigned stride,
-                        struct gl_buffer_object *indirect_params,
-                        GLsizeiptr indirect_params_offset,
-                        const struct _mesa_index_buffer *ib,
-                        bool primitive_restart,
-                        unsigned restart_index);
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
deleted file mode 100644
index 656159e..0000000
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ /dev/null
@@ -1,801 +0,0 @@
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/arrayobj.h"
-#include "main/bufferobj.h"
-#include "main/context.h"
-#include "main/enums.h"
-#include "main/macros.h"
-#include "main/glformats.h"
-#include "nir.h"
-
-#include "brw_draw.h"
-#include "brw_defines.h"
-#include "brw_context.h"
-#include "brw_state.h"
-
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-
-static const GLuint double_types_float[5] = {
-   0,
-   ISL_FORMAT_R64_FLOAT,
-   ISL_FORMAT_R64G64_FLOAT,
-   ISL_FORMAT_R64G64B64_FLOAT,
-   ISL_FORMAT_R64G64B64A64_FLOAT
-};
-
-static const GLuint double_types_passthru[5] = {
-   0,
-   ISL_FORMAT_R64_PASSTHRU,
-   ISL_FORMAT_R64G64_PASSTHRU,
-   ISL_FORMAT_R64G64B64_PASSTHRU,
-   ISL_FORMAT_R64G64B64A64_PASSTHRU
-};
-
-static const GLuint float_types[5] = {
-   0,
-   ISL_FORMAT_R32_FLOAT,
-   ISL_FORMAT_R32G32_FLOAT,
-   ISL_FORMAT_R32G32B32_FLOAT,
-   ISL_FORMAT_R32G32B32A32_FLOAT
-};
-
-static const GLuint half_float_types[5] = {
-   0,
-   ISL_FORMAT_R16_FLOAT,
-   ISL_FORMAT_R16G16_FLOAT,
-   ISL_FORMAT_R16G16B16_FLOAT,
-   ISL_FORMAT_R16G16B16A16_FLOAT
-};
-
-static const GLuint fixed_point_types[5] = {
-   0,
-   ISL_FORMAT_R32_SFIXED,
-   ISL_FORMAT_R32G32_SFIXED,
-   ISL_FORMAT_R32G32B32_SFIXED,
-   ISL_FORMAT_R32G32B32A32_SFIXED,
-};
-
-static const GLuint uint_types_direct[5] = {
-   0,
-   ISL_FORMAT_R32_UINT,
-   ISL_FORMAT_R32G32_UINT,
-   ISL_FORMAT_R32G32B32_UINT,
-   ISL_FORMAT_R32G32B32A32_UINT
-};
-
-static const GLuint uint_types_norm[5] = {
-   0,
-   ISL_FORMAT_R32_UNORM,
-   ISL_FORMAT_R32G32_UNORM,
-   ISL_FORMAT_R32G32B32_UNORM,
-   ISL_FORMAT_R32G32B32A32_UNORM
-};
-
-static const GLuint uint_types_scale[5] = {
-   0,
-   ISL_FORMAT_R32_USCALED,
-   ISL_FORMAT_R32G32_USCALED,
-   ISL_FORMAT_R32G32B32_USCALED,
-   ISL_FORMAT_R32G32B32A32_USCALED
-};
-
-static const GLuint int_types_direct[5] = {
-   0,
-   ISL_FORMAT_R32_SINT,
-   ISL_FORMAT_R32G32_SINT,
-   ISL_FORMAT_R32G32B32_SINT,
-   ISL_FORMAT_R32G32B32A32_SINT
-};
-
-static const GLuint int_types_norm[5] = {
-   0,
-   ISL_FORMAT_R32_SNORM,
-   ISL_FORMAT_R32G32_SNORM,
-   ISL_FORMAT_R32G32B32_SNORM,
-   ISL_FORMAT_R32G32B32A32_SNORM
-};
-
-static const GLuint int_types_scale[5] = {
-   0,
-   ISL_FORMAT_R32_SSCALED,
-   ISL_FORMAT_R32G32_SSCALED,
-   ISL_FORMAT_R32G32B32_SSCALED,
-   ISL_FORMAT_R32G32B32A32_SSCALED
-};
-
-static const GLuint ushort_types_direct[5] = {
-   0,
-   ISL_FORMAT_R16_UINT,
-   ISL_FORMAT_R16G16_UINT,
-   ISL_FORMAT_R16G16B16_UINT,
-   ISL_FORMAT_R16G16B16A16_UINT
-};
-
-static const GLuint ushort_types_norm[5] = {
-   0,
-   ISL_FORMAT_R16_UNORM,
-   ISL_FORMAT_R16G16_UNORM,
-   ISL_FORMAT_R16G16B16_UNORM,
-   ISL_FORMAT_R16G16B16A16_UNORM
-};
-
-static const GLuint ushort_types_scale[5] = {
-   0,
-   ISL_FORMAT_R16_USCALED,
-   ISL_FORMAT_R16G16_USCALED,
-   ISL_FORMAT_R16G16B16_USCALED,
-   ISL_FORMAT_R16G16B16A16_USCALED
-};
-
-static const GLuint short_types_direct[5] = {
-   0,
-   ISL_FORMAT_R16_SINT,
-   ISL_FORMAT_R16G16_SINT,
-   ISL_FORMAT_R16G16B16_SINT,
-   ISL_FORMAT_R16G16B16A16_SINT
-};
-
-static const GLuint short_types_norm[5] = {
-   0,
-   ISL_FORMAT_R16_SNORM,
-   ISL_FORMAT_R16G16_SNORM,
-   ISL_FORMAT_R16G16B16_SNORM,
-   ISL_FORMAT_R16G16B16A16_SNORM
-};
-
-static const GLuint short_types_scale[5] = {
-   0,
-   ISL_FORMAT_R16_SSCALED,
-   ISL_FORMAT_R16G16_SSCALED,
-   ISL_FORMAT_R16G16B16_SSCALED,
-   ISL_FORMAT_R16G16B16A16_SSCALED
-};
-
-static const GLuint ubyte_types_direct[5] = {
-   0,
-   ISL_FORMAT_R8_UINT,
-   ISL_FORMAT_R8G8_UINT,
-   ISL_FORMAT_R8G8B8_UINT,
-   ISL_FORMAT_R8G8B8A8_UINT
-};
-
-static const GLuint ubyte_types_norm[5] = {
-   0,
-   ISL_FORMAT_R8_UNORM,
-   ISL_FORMAT_R8G8_UNORM,
-   ISL_FORMAT_R8G8B8_UNORM,
-   ISL_FORMAT_R8G8B8A8_UNORM
-};
-
-static const GLuint ubyte_types_scale[5] = {
-   0,
-   ISL_FORMAT_R8_USCALED,
-   ISL_FORMAT_R8G8_USCALED,
-   ISL_FORMAT_R8G8B8_USCALED,
-   ISL_FORMAT_R8G8B8A8_USCALED
-};
-
-static const GLuint byte_types_direct[5] = {
-   0,
-   ISL_FORMAT_R8_SINT,
-   ISL_FORMAT_R8G8_SINT,
-   ISL_FORMAT_R8G8B8_SINT,
-   ISL_FORMAT_R8G8B8A8_SINT
-};
-
-static const GLuint byte_types_norm[5] = {
-   0,
-   ISL_FORMAT_R8_SNORM,
-   ISL_FORMAT_R8G8_SNORM,
-   ISL_FORMAT_R8G8B8_SNORM,
-   ISL_FORMAT_R8G8B8A8_SNORM
-};
-
-static const GLuint byte_types_scale[5] = {
-   0,
-   ISL_FORMAT_R8_SSCALED,
-   ISL_FORMAT_R8G8_SSCALED,
-   ISL_FORMAT_R8G8B8_SSCALED,
-   ISL_FORMAT_R8G8B8A8_SSCALED
-};
-
-static GLuint
-double_types(int size, GLboolean doubles)
-{
-   /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
-    * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats,
-    * 64-bit components are stored in the URB without any conversion."
-    * Also included on BDW PRM, Volume 7, page 470, table "Source Element
-    * Formats Supported in VF Unit"
-    *
-    * Previous PRMs don't include those references, so for gfx7 we can't use
-    * PASSTHRU formats directly. But in any case, we prefer to return passthru
-    * even in that case, because that reflects what we want to achieve, even
-    * if we would need to workaround on gen < 8.
-    */
-   return (doubles
-           ? double_types_passthru[size]
-           : double_types_float[size]);
-}
-
-/**
- * Given vertex array type/size/format/normalized info, return
- * the appopriate hardware surface type.
- * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
- */
-unsigned
-brw_get_vertex_surface_type(struct brw_context *brw,
-                            const struct gl_vertex_format *glformat)
-{
-   int size = glformat->Size;
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const bool is_ivybridge_or_older =
-      devinfo->verx10 < 70 || devinfo->platform == INTEL_PLATFORM_IVB;
-
-   if (INTEL_DEBUG(DEBUG_VERTS))
-      fprintf(stderr, "type %s size %d normalized %d\n",
-              _mesa_enum_to_string(glformat->Type),
-              glformat->Size, glformat->Normalized);
-
-   if (glformat->Integer) {
-      assert(glformat->Format == GL_RGBA); /* sanity check */
-      switch (glformat->Type) {
-      case GL_INT: return int_types_direct[size];
-      case GL_SHORT:
-         if (is_ivybridge_or_older && size == 3)
-            return short_types_direct[4];
-         else
-            return short_types_direct[size];
-      case GL_BYTE:
-         if (is_ivybridge_or_older && size == 3)
-            return byte_types_direct[4];
-         else
-            return byte_types_direct[size];
-      case GL_UNSIGNED_INT: return uint_types_direct[size];
-      case GL_UNSIGNED_SHORT:
-         if (is_ivybridge_or_older && size == 3)
-            return ushort_types_direct[4];
-         else
-            return ushort_types_direct[size];
-      case GL_UNSIGNED_BYTE:
-         if (is_ivybridge_or_older && size == 3)
-            return ubyte_types_direct[4];
-         else
-            return ubyte_types_direct[size];
-      default: unreachable("not reached");
-      }
-   } else if (glformat->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) {
-      return ISL_FORMAT_R11G11B10_FLOAT;
-   } else if (glformat->Normalized) {
-      switch (glformat->Type) {
-      case GL_DOUBLE: return double_types(size, glformat->Doubles);
-      case GL_FLOAT: return float_types[size];
-      case GL_HALF_FLOAT:
-      case GL_HALF_FLOAT_OES:
-         if (devinfo->ver < 6 && size == 3)
-            return half_float_types[4];
-         else
-            return half_float_types[size];
-      case GL_INT: return int_types_norm[size];
-      case GL_SHORT: return short_types_norm[size];
-      case GL_BYTE: return byte_types_norm[size];
-      case GL_UNSIGNED_INT: return uint_types_norm[size];
-      case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
-      case GL_UNSIGNED_BYTE:
-         if (glformat->Format == GL_BGRA) {
-            /* See GL_EXT_vertex_array_bgra */
-            assert(size == 4);
-            return ISL_FORMAT_B8G8R8A8_UNORM;
-         }
-         else {
-            return ubyte_types_norm[size];
-         }
-      case GL_FIXED:
-         if (devinfo->verx10 >= 75)
-            return fixed_point_types[size];
-
-         /* This produces GL_FIXED inputs as values between INT32_MIN and
-          * INT32_MAX, which will be scaled down by 1/65536 by the VS.
-          */
-         return int_types_scale[size];
-      /* See GL_ARB_vertex_type_2_10_10_10_rev.
-       * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd
-       * like to use here, so upload everything as UINT and fix
-       * it in the shader
-       */
-      case GL_INT_2_10_10_10_REV:
-         assert(size == 4);
-         if (devinfo->verx10 >= 75) {
-            return glformat->Format == GL_BGRA
-               ? ISL_FORMAT_B10G10R10A2_SNORM
-               : ISL_FORMAT_R10G10B10A2_SNORM;
-         }
-         return ISL_FORMAT_R10G10B10A2_UINT;
-      case GL_UNSIGNED_INT_2_10_10_10_REV:
-         assert(size == 4);
-         if (devinfo->verx10 >= 75) {
-            return glformat->Format == GL_BGRA
-               ? ISL_FORMAT_B10G10R10A2_UNORM
-               : ISL_FORMAT_R10G10B10A2_UNORM;
-         }
-         return ISL_FORMAT_R10G10B10A2_UINT;
-      default: unreachable("not reached");
-      }
-   }
-   else {
-      /* See GL_ARB_vertex_type_2_10_10_10_rev.
-       * W/A: the hardware doesn't really support the formats we'd
-       * like to use here, so upload everything as UINT and fix
-       * it in the shader
-       */
-      if (glformat->Type == GL_INT_2_10_10_10_REV) {
-         assert(size == 4);
-         if (devinfo->verx10 >= 75) {
-            return glformat->Format == GL_BGRA
-               ? ISL_FORMAT_B10G10R10A2_SSCALED
-               : ISL_FORMAT_R10G10B10A2_SSCALED;
-         }
-         return ISL_FORMAT_R10G10B10A2_UINT;
-      } else if (glformat->Type == GL_UNSIGNED_INT_2_10_10_10_REV) {
-         assert(size == 4);
-         if (devinfo->verx10 >= 75) {
-            return glformat->Format == GL_BGRA
-               ? ISL_FORMAT_B10G10R10A2_USCALED
-               : ISL_FORMAT_R10G10B10A2_USCALED;
-         }
-         return ISL_FORMAT_R10G10B10A2_UINT;
-      }
-      assert(glformat->Format == GL_RGBA); /* sanity check */
-      switch (glformat->Type) {
-      case GL_DOUBLE: return double_types(size, glformat->Doubles);
-      case GL_FLOAT: return float_types[size];
-      case GL_HALF_FLOAT:
-      case GL_HALF_FLOAT_OES:
-         if (devinfo->ver < 6 && size == 3)
-            return half_float_types[4];
-         else
-            return half_float_types[size];
-      case GL_INT: return int_types_scale[size];
-      case GL_SHORT: return short_types_scale[size];
-      case GL_BYTE: return byte_types_scale[size];
-      case GL_UNSIGNED_INT: return uint_types_scale[size];
-      case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
-      case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
-      case GL_FIXED:
-         if (devinfo->verx10 >= 75)
-            return fixed_point_types[size];
-
-         /* This produces GL_FIXED inputs as values between INT32_MIN and
-          * INT32_MAX, which will be scaled down by 1/65536 by the VS.
-          */
-         return int_types_scale[size];
-      default: unreachable("not reached");
-      }
-   }
-}
-
-static void
-copy_array_to_vbo_array(struct brw_context *brw,
-                        const uint8_t *const ptr, const int src_stride,
-                        int min, int max,
-                        struct brw_vertex_buffer *buffer,
-                        GLuint dst_stride)
-{
-   const unsigned char *src = ptr + min * src_stride;
-   int count = max - min + 1;
-   GLuint size = count * dst_stride;
-   uint8_t *dst = brw_upload_space(&brw->upload, size, dst_stride,
-                                   &buffer->bo, &buffer->offset);
-
-   /* The GL 4.5 spec says:
-    *      "If any enabled arrayâs buffer binding is zero when DrawArrays or
-    *      one of the other drawing commands defined in section 10.4 is called,
-    *      the result is undefined."
-    *
-    * In this case, let's the dst with undefined values
-    */
-   if (ptr != NULL) {
-      if (dst_stride == src_stride) {
-         memcpy(dst, src, size);
-      } else {
-         while (count--) {
-            memcpy(dst, src, dst_stride);
-            src += src_stride;
-            dst += dst_stride;
-         }
-      }
-   }
-   buffer->stride = dst_stride;
-   buffer->size = size;
-}
-
-void
-brw_prepare_vertices(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_VERTEX_PROGRAM */
-   const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
-   /* BRW_NEW_VS_PROG_DATA */
-   const struct brw_vs_prog_data *vs_prog_data =
-      brw_vs_prog_data(brw->vs.base.prog_data);
-   const uint64_t vs_inputs64 =
-      nir_get_single_slot_attribs_mask(vs_prog_data->inputs_read,
-                                       vp->DualSlotInputs);
-   assert((vs_inputs64 & ~(uint64_t)VERT_BIT_ALL) == 0);
-   unsigned vs_inputs = (unsigned)vs_inputs64;
-   unsigned int min_index = brw->vb.min_index + brw->basevertex;
-   unsigned int max_index = brw->vb.max_index + brw->basevertex;
-   int delta, j;
-
-   /* _NEW_POLYGON
-    *
-    * On gfx6+, edge flags don't end up in the VUE (either in or out of the
-    * VS).  Instead, they're uploaded as the last vertex element, and the data
-    * is passed sideband through the fixed function units.  So, we need to
-    * prepare the vertex buffer for it, but it's not present in inputs_read.
-    */
-   if (devinfo->ver >= 6 && (ctx->Polygon.FrontMode != GL_FILL ||
-                           ctx->Polygon.BackMode != GL_FILL)) {
-      vs_inputs |= VERT_BIT_EDGEFLAG;
-   }
-
-   if (0)
-      fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index);
-
-   /* Accumulate the list of enabled arrays. */
-   brw->vb.nr_enabled = 0;
-
-   unsigned mask = vs_inputs;
-   while (mask) {
-      const gl_vert_attrib attr = u_bit_scan(&mask);
-      struct brw_vertex_element *input = &brw->vb.inputs[attr];
-      brw->vb.enabled[brw->vb.nr_enabled++] = input;
-   }
-   assert(brw->vb.nr_enabled <= VERT_ATTRIB_MAX);
-
-   if (brw->vb.nr_enabled == 0)
-      return;
-
-   if (brw->vb.nr_buffers)
-      return;
-
-   j = 0;
-   const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
-
-   unsigned vbomask = vs_inputs & _mesa_draw_vbo_array_bits(ctx);
-   while (vbomask) {
-      const struct gl_vertex_buffer_binding *const glbinding =
-         _mesa_draw_buffer_binding(vao, ffs(vbomask) - 1);
-      const GLsizei stride = glbinding->Stride;
-
-      assert(glbinding->BufferObj);
-
-      /* Accumulate the range of a single vertex, start with inverted range */
-      uint32_t vertex_range_start = ~(uint32_t)0;
-      uint32_t vertex_range_end = 0;
-
-      const unsigned boundmask = _mesa_draw_bound_attrib_bits(glbinding);
-      unsigned attrmask = vbomask & boundmask;
-      /* Mark the those attributes as processed */
-      vbomask ^= attrmask;
-      /* We can assume that we have an array for the binding */
-      assert(attrmask);
-      /* Walk attributes belonging to the binding */
-      while (attrmask) {
-         const gl_vert_attrib attr = u_bit_scan(&attrmask);
-         const struct gl_array_attributes *const glattrib =
-            _mesa_draw_array_attrib(vao, attr);
-         const uint32_t rel_offset =
-            _mesa_draw_attributes_relative_offset(glattrib);
-         const uint32_t rel_end = rel_offset + glattrib->Format._ElementSize;
-
-         vertex_range_start = MIN2(vertex_range_start, rel_offset);
-         vertex_range_end = MAX2(vertex_range_end, rel_end);
-
-         struct brw_vertex_element *input = &brw->vb.inputs[attr];
-         input->glformat = &glattrib->Format;
-         input->buffer = j;
-         input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0;
-         input->offset = rel_offset;
-      }
-      assert(vertex_range_start <= vertex_range_end);
-
-      struct brw_buffer_object *intel_buffer =
-         brw_buffer_object(glbinding->BufferObj);
-      struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
-
-      const uint32_t offset = _mesa_draw_binding_offset(glbinding);
-
-      /* If nothing else is known take the buffer size and offset as a bound */
-      uint32_t start = vertex_range_start;
-      uint32_t range = intel_buffer->Base.Size - offset - vertex_range_start;
-      /* Check if we can get a more narrow range */
-      if (glbinding->InstanceDivisor) {
-         if (brw->num_instances) {
-            const uint32_t vertex_size = vertex_range_end - vertex_range_start;
-            start = vertex_range_start + stride * brw->baseinstance;
-            range = (stride * ((brw->num_instances - 1) /
-                               glbinding->InstanceDivisor) +
-                     vertex_size);
-         }
-      } else {
-         if (brw->vb.index_bounds_valid) {
-            const uint32_t vertex_size = vertex_range_end - vertex_range_start;
-            start = vertex_range_start + stride * min_index;
-            range = (stride * (max_index - min_index) +
-                     vertex_size);
-
-            /**
-             * Unreal Engine 4 has a bug in usage of glDrawRangeElements,
-             * causing it to be called with a number of vertices in place
-             * of "end" parameter (which specifies the maximum array index
-             * contained in indices).
-             *
-             * Since there is unknown amount of games affected and we
-             * could not identify that a game is built with UE4 - we are
-             * forced to make a blanket workaround, disregarding max_index
-             * in range calculations. Fortunately all such calls look like:
-             *   glDrawRangeElements(GL_TRIANGLES, 0, 3, 3, ...);
-             * So we are able to narrow down this workaround.
-             *
-             * See: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2917
-             */
-            if (unlikely(max_index == 3 && min_index == 0 &&
-                         brw->draw.derived_params.is_indexed_draw)) {
-                  range = intel_buffer->Base.Size - offset - start;
-            }
-         }
-      }
-
-      buffer->offset = offset;
-      buffer->size = start + range;
-      buffer->stride = stride;
-      buffer->step_rate = glbinding->InstanceDivisor;
-
-      buffer->bo = brw_bufferobj_buffer(brw, intel_buffer, offset + start,
-                                        range, false);
-      brw_bo_reference(buffer->bo);
-
-      j++;
-   }
-
-   /* If we need to upload all the arrays, then we can trim those arrays to
-    * only the used elements [min_index, max_index] so long as we adjust all
-    * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
-    */
-   brw->vb.start_vertex_bias = 0;
-   delta = min_index;
-   if ((vs_inputs & _mesa_draw_vbo_array_bits(ctx)) == 0) {
-      brw->vb.start_vertex_bias = -delta;
-      delta = 0;
-   }
-
-   unsigned usermask = vs_inputs & _mesa_draw_user_array_bits(ctx);
-   while (usermask) {
-      const struct gl_vertex_buffer_binding *const glbinding =
-         _mesa_draw_buffer_binding(vao, ffs(usermask) - 1);
-      const GLsizei stride = glbinding->Stride;
-
-      assert(!glbinding->BufferObj);
-      assert(brw->vb.index_bounds_valid);
-
-      /* Accumulate the range of a single vertex, start with inverted range */
-      uint32_t vertex_range_start = ~(uint32_t)0;
-      uint32_t vertex_range_end = 0;
-
-      const unsigned boundmask = _mesa_draw_bound_attrib_bits(glbinding);
-      unsigned attrmask = usermask & boundmask;
-      /* Mark the those attributes as processed */
-      usermask ^= attrmask;
-      /* We can assume that we have an array for the binding */
-      assert(attrmask);
-      /* Walk attributes belonging to the binding */
-      while (attrmask) {
-         const gl_vert_attrib attr = u_bit_scan(&attrmask);
-         const struct gl_array_attributes *const glattrib =
-            _mesa_draw_array_attrib(vao, attr);
-         const uint32_t rel_offset =
-            _mesa_draw_attributes_relative_offset(glattrib);
-         const uint32_t rel_end = rel_offset + glattrib->Format._ElementSize;
-
-         vertex_range_start = MIN2(vertex_range_start, rel_offset);
-         vertex_range_end = MAX2(vertex_range_end, rel_end);
-
-         struct brw_vertex_element *input = &brw->vb.inputs[attr];
-         input->glformat = &glattrib->Format;
-         input->buffer = j;
-         input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0;
-         input->offset = rel_offset;
-      }
-      assert(vertex_range_start <= vertex_range_end);
-
-      struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
-
-      const uint8_t *ptr = (const uint8_t*)_mesa_draw_binding_offset(glbinding);
-      ptr += vertex_range_start;
-      const uint32_t vertex_size = vertex_range_end - vertex_range_start;
-      if (glbinding->Stride == 0) {
-         /* If the source stride is zero, we just want to upload the current
-          * attribute once and set the buffer's stride to 0.  There's no need
-          * to replicate it out.
-          */
-         copy_array_to_vbo_array(brw, ptr, 0, 0, 0, buffer, vertex_size);
-      } else if (glbinding->InstanceDivisor == 0) {
-         copy_array_to_vbo_array(brw, ptr, stride, min_index,
-                                 max_index, buffer, vertex_size);
-      } else {
-         /* This is an instanced attribute, since its InstanceDivisor
-          * is not zero. Therefore, its data will be stepped after the
-          * instanced draw has been run InstanceDivisor times.
-          */
-         uint32_t instanced_attr_max_index =
-            (brw->num_instances - 1) / glbinding->InstanceDivisor;
-         copy_array_to_vbo_array(brw, ptr, stride, 0,
-                                 instanced_attr_max_index, buffer, vertex_size);
-      }
-      buffer->offset -= delta * buffer->stride + vertex_range_start;
-      buffer->size += delta * buffer->stride + vertex_range_start;
-      buffer->step_rate = glbinding->InstanceDivisor;
-
-      j++;
-   }
-
-   /* Upload the current values */
-   unsigned curmask = vs_inputs & _mesa_draw_current_bits(ctx);
-   if (curmask) {
-      /* For each attribute, upload the maximum possible size. */
-      uint8_t data[VERT_ATTRIB_MAX * sizeof(GLdouble) * 4];
-      uint8_t *cursor = data;
-
-      do {
-         const gl_vert_attrib attr = u_bit_scan(&curmask);
-         const struct gl_array_attributes *const glattrib =
-            _mesa_draw_current_attrib(ctx, attr);
-         const unsigned size = glattrib->Format._ElementSize;
-         const unsigned alignment = align(size, sizeof(GLdouble));
-         memcpy(cursor, glattrib->Ptr, size);
-         if (alignment != size)
-            memset(cursor + size, 0, alignment - size);
-
-         struct brw_vertex_element *input = &brw->vb.inputs[attr];
-         input->glformat = &glattrib->Format;
-         input->buffer = j;
-         input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0;
-         input->offset = cursor - data;
-
-         cursor += alignment;
-      } while (curmask);
-
-      struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
-      const unsigned size = cursor - data;
-      brw_upload_data(&brw->upload, data, size, size,
-                      &buffer->bo, &buffer->offset);
-      buffer->stride = 0;
-      buffer->size = size;
-      buffer->step_rate = 0;
-
-      j++;
-   }
-   brw->vb.nr_buffers = j;
-}
-
-void
-brw_prepare_shader_draw_parameters(struct brw_context *brw)
-{
-   const struct brw_vs_prog_data *vs_prog_data =
-      brw_vs_prog_data(brw->vs.base.prog_data);
-
-   /* For non-indirect draws, upload the shader draw parameters */
-   if ((vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) &&
-       brw->draw.draw_params_bo == NULL) {
-      brw_upload_data(&brw->upload,
-                      &brw->draw.params, sizeof(brw->draw.params), 4,
-                      &brw->draw.draw_params_bo,
-                      &brw->draw.draw_params_offset);
-   }
-
-   if (vs_prog_data->uses_drawid || vs_prog_data->uses_is_indexed_draw) {
-      brw_upload_data(&brw->upload,
-                      &brw->draw.derived_params, sizeof(brw->draw.derived_params), 4,
-                      &brw->draw.derived_draw_params_bo,
-                      &brw->draw.derived_draw_params_offset);
-   }
-}
-
-static void
-brw_upload_indices(struct brw_context *brw)
-{
-   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
-   GLuint ib_size;
-   struct brw_bo *old_bo = brw->ib.bo;
-   struct gl_buffer_object *bufferobj;
-   GLuint offset;
-   GLuint ib_type_size;
-
-   if (index_buffer == NULL)
-      return;
-
-   ib_type_size = 1 << index_buffer->index_size_shift;
-   ib_size = index_buffer->count ? ib_type_size * index_buffer->count :
-                                   index_buffer->obj->Size;
-   bufferobj = index_buffer->obj;
-
-   /* Turn into a proper VBO:
-    */
-   if (!bufferobj) {
-      /* Get new bufferobj, offset:
-       */
-      brw_upload_data(&brw->upload, index_buffer->ptr, ib_size, ib_type_size,
-                      &brw->ib.bo, &offset);
-      brw->ib.size = brw->ib.bo->size;
-   } else {
-      offset = (GLuint) (unsigned long) index_buffer->ptr;
-
-      struct brw_bo *bo =
-         brw_bufferobj_buffer(brw, brw_buffer_object(bufferobj),
-                              offset, ib_size, false);
-      if (bo != brw->ib.bo) {
-         brw_bo_unreference(brw->ib.bo);
-         brw->ib.bo = bo;
-         brw->ib.size = bufferobj->Size;
-         brw_bo_reference(bo);
-      }
-   }
-
-   /* Use 3DPRIMITIVE's start_vertex_offset to avoid re-uploading
-    * the index buffer state when we're just moving the start index
-    * of our drawing.
-    */
-   brw->ib.start_vertex_offset = offset / ib_type_size;
-
-   if (brw->ib.bo != old_bo)
-      brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
-
-   unsigned index_size = 1 << index_buffer->index_size_shift;
-   if (index_size != brw->ib.index_size) {
-      brw->ib.index_size = index_size;
-      brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
-   }
-
-   /* We need to re-emit an index buffer state each time
-    * when cut index flag is changed
-    */
-   if (brw->prim_restart.enable_cut_index != brw->ib.enable_cut_index) {
-      brw->ib.enable_cut_index = brw->prim_restart.enable_cut_index;
-      brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
-   }
-}
-
-const struct brw_tracked_state brw_indices = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BLORP |
-             BRW_NEW_INDICES,
-   },
-   .emit = brw_upload_indices,
-};
diff --git a/src/mesa/drivers/dri/i965/brw_extensions.c b/src/mesa/drivers/dri/i965/brw_extensions.c
deleted file mode 100644
index 7f72799..0000000
--- a/src/mesa/drivers/dri/i965/brw_extensions.c
+++ /dev/null
@@ -1,404 +0,0 @@
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/version.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_batch.h"
-
-/**
- * Initializes potential list of extensions if ctx == NULL, or actually enables
- * extensions for a context.
- */
-void
-brw_init_extensions(struct gl_context *ctx)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   assert(devinfo->ver >= 4);
-
-   ctx->Extensions.ARB_arrays_of_arrays = true;
-   ctx->Extensions.ARB_buffer_storage = true;
-   ctx->Extensions.ARB_clear_texture = true;
-   ctx->Extensions.ARB_clip_control = true;
-   ctx->Extensions.ARB_copy_image = true;
-   ctx->Extensions.ARB_depth_buffer_float = true;
-   ctx->Extensions.ARB_depth_clamp = true;
-   ctx->Extensions.ARB_depth_texture = true;
-   ctx->Extensions.ARB_draw_elements_base_vertex = true;
-   ctx->Extensions.ARB_draw_instanced = true;
-   ctx->Extensions.ARB_ES2_compatibility = true;
-   ctx->Extensions.ARB_explicit_attrib_location = true;
-   ctx->Extensions.ARB_explicit_uniform_location = true;
-   ctx->Extensions.ARB_fragment_coord_conventions = true;
-   ctx->Extensions.ARB_fragment_program = true;
-   ctx->Extensions.ARB_fragment_program_shadow = true;
-   ctx->Extensions.ARB_fragment_shader = true;
-   ctx->Extensions.ARB_framebuffer_object = true;
-   ctx->Extensions.ARB_half_float_vertex = true;
-   ctx->Extensions.ARB_instanced_arrays = true;
-   ctx->Extensions.ARB_internalformat_query = true;
-   ctx->Extensions.ARB_internalformat_query2 = true;
-   ctx->Extensions.ARB_map_buffer_range = true;
-   ctx->Extensions.ARB_occlusion_query = true;
-   ctx->Extensions.ARB_occlusion_query2 = true;
-   ctx->Extensions.ARB_point_sprite = true;
-   ctx->Extensions.ARB_polygon_offset_clamp = true;
-   ctx->Extensions.ARB_seamless_cube_map = true;
-   ctx->Extensions.ARB_shader_bit_encoding = true;
-   ctx->Extensions.ARB_shader_draw_parameters = true;
-   ctx->Extensions.ARB_shader_group_vote = true;
-   ctx->Extensions.ARB_shader_texture_lod = true;
-   ctx->Extensions.ARB_shading_language_packing = true;
-   ctx->Extensions.ARB_shadow = true;
-   ctx->Extensions.ARB_sync = true;
-   ctx->Extensions.ARB_texture_border_clamp = true;
-   ctx->Extensions.ARB_texture_compression_rgtc = true;
-   ctx->Extensions.ARB_texture_cube_map = true;
-   ctx->Extensions.ARB_texture_env_combine = true;
-   ctx->Extensions.ARB_texture_env_crossbar = true;
-   ctx->Extensions.ARB_texture_env_dot3 = true;
-   ctx->Extensions.ARB_texture_filter_anisotropic = true;
-   ctx->Extensions.ARB_texture_float = true;
-   ctx->Extensions.ARB_texture_mirror_clamp_to_edge = true;
-   ctx->Extensions.ARB_texture_non_power_of_two = true;
-   ctx->Extensions.ARB_texture_rg = true;
-   ctx->Extensions.ARB_texture_rgb10_a2ui = true;
-   ctx->Extensions.ARB_vertex_program = true;
-   ctx->Extensions.ARB_vertex_shader = true;
-   ctx->Extensions.ARB_vertex_type_2_10_10_10_rev = true;
-   ctx->Extensions.ARB_vertex_type_10f_11f_11f_rev = true;
-   ctx->Extensions.EXT_blend_color = true;
-   ctx->Extensions.EXT_blend_equation_separate = true;
-   ctx->Extensions.EXT_blend_func_separate = true;
-   ctx->Extensions.EXT_blend_minmax = true;
-   ctx->Extensions.EXT_color_buffer_half_float = true;
-   ctx->Extensions.EXT_draw_buffers2 = true;
-   ctx->Extensions.EXT_EGL_image_storage = true;
-   ctx->Extensions.EXT_float_blend = true;
-   ctx->Extensions.EXT_framebuffer_sRGB = true;
-   ctx->Extensions.EXT_gpu_program_parameters = true;
-   ctx->Extensions.EXT_packed_float = true;
-   ctx->Extensions.EXT_pixel_buffer_object = true;
-   ctx->Extensions.EXT_point_parameters = true;
-   ctx->Extensions.EXT_provoking_vertex = true;
-   ctx->Extensions.EXT_render_snorm = true;
-   ctx->Extensions.EXT_sRGB = true;
-   ctx->Extensions.EXT_stencil_two_side = true;
-   ctx->Extensions.EXT_texture_array = true;
-   ctx->Extensions.EXT_texture_env_dot3 = true;
-   ctx->Extensions.EXT_texture_filter_anisotropic = true;
-   ctx->Extensions.EXT_texture_integer = true;
-   ctx->Extensions.EXT_texture_norm16 = true;
-   ctx->Extensions.EXT_texture_shared_exponent = true;
-   ctx->Extensions.EXT_texture_snorm = true;
-   ctx->Extensions.EXT_texture_sRGB = true;
-   ctx->Extensions.EXT_texture_sRGB_decode = true;
-   ctx->Extensions.EXT_texture_sRGB_R8 = true;
-   ctx->Extensions.EXT_texture_swizzle = true;
-   ctx->Extensions.EXT_texture_type_2_10_10_10_REV = true;
-   ctx->Extensions.EXT_vertex_array_bgra = true;
-   ctx->Extensions.KHR_robustness = true;
-   ctx->Extensions.AMD_seamless_cubemap_per_texture = true;
-   ctx->Extensions.APPLE_object_purgeable = true;
-   ctx->Extensions.ATI_texture_env_combine3 = true;
-   ctx->Extensions.MESA_framebuffer_flip_y = true;
-   ctx->Extensions.NV_conditional_render = true;
-   ctx->Extensions.NV_fog_distance = true;
-   ctx->Extensions.NV_primitive_restart = true;
-   ctx->Extensions.NV_texture_barrier = true;
-   ctx->Extensions.NV_texture_env_combine4 = true;
-   ctx->Extensions.NV_texture_rectangle = true;
-   ctx->Extensions.TDFX_texture_compression_FXT1 = true;
-   ctx->Extensions.OES_compressed_ETC1_RGB8_texture = true;
-   ctx->Extensions.OES_draw_texture = true;
-   ctx->Extensions.OES_EGL_image = true;
-   ctx->Extensions.OES_EGL_image_external = true;
-   ctx->Extensions.OES_standard_derivatives = true;
-   ctx->Extensions.OES_texture_float = true;
-   ctx->Extensions.OES_texture_float_linear = true;
-   ctx->Extensions.OES_texture_half_float = true;
-   ctx->Extensions.OES_texture_half_float_linear = true;
-
-   if (devinfo->ver >= 8)
-      ctx->Const.GLSLVersion = 460;
-   else if (devinfo->platform == INTEL_PLATFORM_HSW &&
-            can_do_pipelined_register_writes(brw->screen))
-      ctx->Const.GLSLVersion = 450;
-   else if (devinfo->ver >= 7 && can_do_pipelined_register_writes(brw->screen))
-      ctx->Const.GLSLVersion = 420;
-   else if (devinfo->ver >= 6)
-      ctx->Const.GLSLVersion = 330;
-   else
-      ctx->Const.GLSLVersion = 120;
-
-   if (devinfo->ver >= 6)
-      ctx->Const.GLSLVersionCompat = 130;
-   else
-      ctx->Const.GLSLVersionCompat = 120;
-
-   _mesa_override_glsl_version(&ctx->Const);
-
-   ctx->Extensions.EXT_shader_integer_mix = ctx->Const.GLSLVersion >= 130;
-   ctx->Extensions.MESA_shader_integer_functions = ctx->Const.GLSLVersion >= 130;
-
-   if (devinfo->verx10 >= 45) {
-      ctx->Extensions.EXT_shader_framebuffer_fetch_non_coherent = true;
-      ctx->Extensions.KHR_blend_equation_advanced = true;
-   }
-
-   if (devinfo->ver >= 5) {
-      ctx->Extensions.ARB_texture_query_levels = ctx->Const.GLSLVersion >= 130;
-      ctx->Extensions.ARB_texture_query_lod = true;
-      ctx->Extensions.EXT_timer_query = true;
-   }
-
-   if (devinfo->ver == 6)
-      ctx->Extensions.ARB_transform_feedback2 = true;
-
-   if (devinfo->ver >= 6) {
-      ctx->Extensions.ARB_blend_func_extended =
-         !driQueryOptionb(&brw->screen->optionCache, "disable_blend_func_extended");
-      ctx->Extensions.ARB_conditional_render_inverted = true;
-      ctx->Extensions.ARB_cull_distance = true;
-      ctx->Extensions.ARB_draw_buffers_blend = true;
-      if (ctx->API != API_OPENGL_COMPAT ||
-          ctx->Const.AllowHigherCompatVersion)
-         ctx->Extensions.ARB_enhanced_layouts = true;
-      ctx->Extensions.ARB_ES3_compatibility = true;
-      ctx->Extensions.ARB_fragment_layer_viewport = true;
-      ctx->Extensions.ARB_pipeline_statistics_query = true;
-      ctx->Extensions.ARB_sample_shading = true;
-      ctx->Extensions.ARB_shading_language_420pack = true;
-      if (ctx->API != API_OPENGL_COMPAT ||
-          ctx->Const.AllowHigherCompatVersion) {
-         ctx->Extensions.ARB_texture_buffer_object = true;
-         ctx->Extensions.ARB_texture_buffer_object_rgb32 = true;
-         ctx->Extensions.ARB_texture_buffer_range = true;
-      }
-      ctx->Extensions.ARB_texture_cube_map_array = true;
-      ctx->Extensions.ARB_texture_gather = true;
-      ctx->Extensions.ARB_texture_multisample = true;
-      ctx->Extensions.ARB_uniform_buffer_object = true;
-      ctx->Extensions.EXT_gpu_shader4 = true;
-      ctx->Extensions.EXT_texture_shadow_lod = true;
-
-      if (ctx->API != API_OPENGL_COMPAT ||
-          ctx->Const.AllowHigherCompatVersion)
-         ctx->Extensions.AMD_vertex_shader_layer = true;
-      ctx->Extensions.EXT_framebuffer_multisample = true;
-      ctx->Extensions.EXT_framebuffer_multisample_blit_scaled = true;
-      ctx->Extensions.EXT_transform_feedback = true;
-      ctx->Extensions.ARB_transform_feedback_overflow_query = true;
-      ctx->Extensions.OES_depth_texture_cube_map = true;
-      ctx->Extensions.OES_sample_variables = true;
-
-      ctx->Extensions.ARB_timer_query = brw->screen->hw_has_timestamp;
-      ctx->Extensions.EXT_disjoint_timer_query =
-         ctx->Extensions.ARB_timer_query;
-
-      /* Only enable this in core profile because geometry shaders are
-       * required, and Mesa only supports geometry shaders in OpenGL 3.2 and
-       * later.  In this driver, that currently means Core profile.
-       */
-      if (ctx->API == API_OPENGL_CORE ||
-          ctx->Const.AllowHigherCompatVersion) {
-         ctx->Extensions.ARB_shader_viewport_layer_array = true;
-         ctx->Extensions.ARB_viewport_array = true;
-         ctx->Extensions.AMD_vertex_shader_viewport_index = true;
-      }
-   }
-
-   brw->predicate.supported = false;
-
-   if (devinfo->ver >= 7) {
-      ctx->Extensions.ARB_conservative_depth = true;
-      ctx->Extensions.ARB_derivative_control = true;
-      ctx->Extensions.ARB_framebuffer_no_attachments = true;
-      if (ctx->API != API_OPENGL_COMPAT ||
-          ctx->Const.AllowHigherCompatVersion) {
-         ctx->Extensions.ARB_gpu_shader5 = true;
-         ctx->Extensions.ARB_gpu_shader_fp64 = true;
-      }
-      ctx->Extensions.ARB_shader_atomic_counters = true;
-      ctx->Extensions.ARB_shader_atomic_counter_ops = true;
-      ctx->Extensions.ARB_shader_clock = true;
-      ctx->Extensions.ARB_shader_image_load_store = true;
-      ctx->Extensions.ARB_shader_image_size = true;
-      ctx->Extensions.ARB_shader_precision = true;
-      ctx->Extensions.ARB_shader_texture_image_samples = true;
-      if (ctx->API != API_OPENGL_COMPAT ||
-          ctx->Const.AllowHigherCompatVersion)
-         ctx->Extensions.ARB_tessellation_shader = true;
-      ctx->Extensions.ARB_texture_compression_bptc = true;
-      ctx->Extensions.ARB_texture_view = true;
-      ctx->Extensions.ARB_shader_storage_buffer_object = true;
-      ctx->Extensions.ARB_vertex_attrib_64bit = true;
-      ctx->Extensions.EXT_shader_samples_identical = true;
-      ctx->Extensions.OES_primitive_bounding_box = true;
-      ctx->Extensions.OES_texture_buffer = true;
-
-      if (can_do_pipelined_register_writes(brw->screen)) {
-         ctx->Extensions.ARB_draw_indirect = true;
-         ctx->Extensions.ARB_transform_feedback2 = true;
-         ctx->Extensions.ARB_transform_feedback3 = true;
-         ctx->Extensions.ARB_transform_feedback_instanced = true;
-
-         if (can_do_compute_dispatch(brw->screen) &&
-             ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) {
-            ctx->Extensions.ARB_compute_shader = true;
-            ctx->Extensions.ARB_ES3_1_compatibility =
-               devinfo->verx10 >= 75;
-            ctx->Extensions.NV_compute_shader_derivatives = true;
-            ctx->Extensions.ARB_compute_variable_group_size = true;
-         }
-
-         if (can_do_predicate_writes(brw->screen)) {
-            brw->predicate.supported = true;
-            ctx->Extensions.ARB_indirect_parameters = true;
-         }
-      }
-
-      ctx->Extensions.ARB_gl_spirv = true;
-      ctx->Extensions.ARB_spirv_extensions = true;
-   }
-
-   if (devinfo->verx10 >= 75) {
-      ctx->Extensions.ARB_stencil_texturing = true;
-      ctx->Extensions.ARB_texture_stencil8 = true;
-      ctx->Extensions.OES_geometry_shader = true;
-      ctx->Extensions.OES_texture_cube_map_array = true;
-      ctx->Extensions.OES_viewport_array = true;
-   }
-
-   if (devinfo->verx10 >= 75 || devinfo->platform == INTEL_PLATFORM_BYT) {
-      ctx->Extensions.ARB_robust_buffer_access_behavior = true;
-   }
-
-   if (can_do_mi_math_and_lrr(brw->screen)) {
-      ctx->Extensions.ARB_query_buffer_object = true;
-   }
-
-   if (devinfo->ver >= 8 || devinfo->platform == INTEL_PLATFORM_BYT) {
-      /* For now, we can't enable OES_texture_view on Gen 7 because of
-       * some piglit failures coming from
-       * piglit/tests/spec/arb_texture_view/rendering-formats.c that need
-       * investigation.
-       */
-      ctx->Extensions.OES_texture_view = true;
-   }
-
-   if (devinfo->ver >= 7) {
-      /* We can safely enable OES_copy_image on Gen 7, since we emulate
-       * the ETC2 support using the shadow_miptree to store the
-       * compressed data.
-       */
-      ctx->Extensions.OES_copy_image = true;
-   }
-
-   /* Gen < 6 still uses the blitter. It's somewhat annoying to add support
-    * for blackhole there... Does anybody actually care anymore anyway?
-    */
-   if (devinfo->ver >= 6)
-      ctx->Extensions.INTEL_blackhole_render = true;
-
-   if (devinfo->ver >= 8) {
-      ctx->Extensions.ARB_gpu_shader_int64 = true;
-      /* requires ARB_gpu_shader_int64 */
-      ctx->Extensions.ARB_shader_ballot = true;
-      ctx->Extensions.ARB_ES3_2_compatibility = true;
-
-      /* Currently only implemented in the scalar backend, so only enable for
-       * Gfx8+.  Eventually Gfx6+ could be supported.
-       */
-      ctx->Extensions.INTEL_shader_integer_functions2 = true;
-   }
-
-   if (devinfo->ver >= 9) {
-      ctx->Extensions.ANDROID_extension_pack_es31a = true;
-      ctx->Extensions.AMD_depth_clamp_separate = true;
-      ctx->Extensions.ARB_post_depth_coverage = true;
-      ctx->Extensions.ARB_shader_stencil_export = true;
-      ctx->Extensions.EXT_shader_framebuffer_fetch = true;
-      ctx->Extensions.INTEL_conservative_rasterization = true;
-      ctx->Extensions.INTEL_shader_atomic_float_minmax = true;
-      ctx->Extensions.KHR_blend_equation_advanced_coherent = true;
-      ctx->Extensions.KHR_texture_compression_astc_ldr = true;
-      ctx->Extensions.KHR_texture_compression_astc_sliced_3d = true;
-
-      /*
-       * From the Skylake PRM Vol. 7 (Memory Fence Message, page 221):
-       *  "A memory fence message issued by a thread causes further messages
-       *   issued by the thread to be blocked until all previous data port
-       *   messages have completed, or the results can be globally observed from
-       *   the point of view of other threads in the system."
-       *
-       * From the Haswell PRM Vol. 7 (Memory Fence, page 256):
-       *  "A memory fence message issued by a thread causes further messages
-       *   issued by the thread to be blocked until all previous messages issued
-       *   by the thread to that data port (data cache or render cache) have
-       *   been globally observed from the point of view of other threads in the
-       *   system."
-       *
-       * Summarized: For ARB_fragment_shader_interlock to work, we need to
-       * ensure memory access ordering for all messages to the dataport from
-       * all threads. Memory fence messages prior to SKL only provide memory
-       * access ordering for messages from the same thread, so we can only
-       * support the feature from Gfx9 onwards.
-       *
-       */
-
-      ctx->Extensions.ARB_fragment_shader_interlock = true;
-   }
-
-   if (intel_device_info_is_9lp(devinfo))
-      ctx->Extensions.KHR_texture_compression_astc_hdr = true;
-
-   if (devinfo->ver >= 6)
-      ctx->Extensions.INTEL_performance_query = true;
-
-   if (ctx->API != API_OPENGL_COMPAT ||
-       ctx->Const.AllowHigherCompatVersion)
-      ctx->Extensions.ARB_base_instance = true;
-   if (ctx->API != API_OPENGL_CORE)
-      ctx->Extensions.ARB_color_buffer_float = true;
-
-   ctx->Extensions.EXT_texture_compression_s3tc = true;
-   ctx->Extensions.EXT_texture_compression_s3tc_srgb = true;
-   ctx->Extensions.ANGLE_texture_compression_dxt = true;
-
-   ctx->Extensions.EXT_demote_to_helper_invocation = true;
-
-   ctx->Const.PrimitiveRestartFixedIndex = true;
-
-   if (devinfo->ver >= 7) {
-      ctx->Extensions.EXT_memory_object_fd = true;
-      ctx->Extensions.EXT_memory_object = true;
-      ctx->Extensions.EXT_semaphore = true;
-      ctx->Extensions.EXT_semaphore_fd = true;
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/brw_fbo.c b/src/mesa/drivers/dri/i965/brw_fbo.c
deleted file mode 100644
index ff30385..0000000
--- a/src/mesa/drivers/dri/i965/brw_fbo.c
+++ /dev/null
@@ -1,1139 +0,0 @@
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/enums.h"
-#include "main/macros.h"
-#include "main/mtypes.h"
-#include "main/fbobject.h"
-#include "main/framebuffer.h"
-#include "main/renderbuffer.h"
-#include "main/context.h"
-#include "main/teximage.h"
-#include "main/image.h"
-#include "main/condrender.h"
-#include "util/hash_table.h"
-#include "util/set.h"
-#include "util/u_memory.h"
-
-#include "swrast/swrast.h"
-#include "drivers/common/meta.h"
-
-#include "brw_batch.h"
-#include "brw_buffers.h"
-#include "brw_blit.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-#include "brw_image.h"
-#include "brw_screen.h"
-#include "brw_tex.h"
-#include "brw_context.h"
-#include "brw_defines.h"
-
-#define FILE_DEBUG_FLAG DEBUG_FBO
-
-/** Called by gl_renderbuffer::Delete() */
-static void
-brw_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
-{
-   struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-
-   assert(irb);
-
-   brw_miptree_release(&irb->mt);
-   brw_miptree_release(&irb->singlesample_mt);
-
-   _mesa_delete_renderbuffer(ctx, rb);
-}
-
-/**
- * \brief Downsample a winsys renderbuffer from mt to singlesample_mt.
- *
- * If the miptree needs no downsample, then skip.
- */
-void
-brw_renderbuffer_downsample(struct brw_context *brw,
-                              struct brw_renderbuffer *irb)
-{
-   if (!irb->need_downsample)
-      return;
-   brw_miptree_updownsample(brw, irb->mt, irb->singlesample_mt);
-   irb->need_downsample = false;
-}
-
-/**
- * \brief Upsample a winsys renderbuffer from singlesample_mt to mt.
- *
- * The upsample is done unconditionally.
- */
-void
-brw_renderbuffer_upsample(struct brw_context *brw,
-                          struct brw_renderbuffer *irb)
-{
-   assert(!irb->need_downsample);
-
-   brw_miptree_updownsample(brw, irb->singlesample_mt, irb->mt);
-}
-
-/**
- * \see dd_function_table::MapRenderbuffer
- */
-static void
-brw_map_renderbuffer(struct gl_context *ctx,
-                     struct gl_renderbuffer *rb,
-                     GLuint x, GLuint y, GLuint w, GLuint h,
-                     GLbitfield mode,
-                     GLubyte **out_map,
-                     GLint *out_stride,
-                     bool flip_y)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb;
-   struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-   struct brw_mipmap_tree *mt;
-   void *map;
-   ptrdiff_t stride;
-
-   if (srb->Buffer) {
-      /* this is a malloc'd renderbuffer (accum buffer), not an irb */
-      GLint bpp = _mesa_get_format_bytes(rb->Format);
-      GLint rowStride = srb->RowStride;
-      *out_map = (GLubyte *) srb->Buffer + y * rowStride + x * bpp;
-      *out_stride = rowStride;
-      return;
-   }
-
-   brw_prepare_render(brw);
-
-   /* The MapRenderbuffer API should always return a single-sampled mapping.
-    * The case we are asked to map multisampled RBs is in glReadPixels() (or
-    * swrast paths like glCopyTexImage()) from a window-system MSAA buffer,
-    * and GL expects an automatic resolve to happen.
-    *
-    * If it's a color miptree, there is a ->singlesample_mt which wraps the
-    * actual window system renderbuffer (which we may resolve to at any time),
-    * while the miptree itself is our driver-private allocation.  If it's a
-    * depth or stencil miptree, we have a private MSAA buffer and no shared
-    * singlesample buffer, and since we don't expect anybody to ever actually
-    * resolve it, we just make a temporary singlesample buffer now when we
-    * have to.
-    */
-   if (rb->NumSamples > 1) {
-      if (!irb->singlesample_mt) {
-         irb->singlesample_mt =
-            brw_miptree_create_for_renderbuffer(brw, irb->mt->format,
-                                                rb->Width, rb->Height,
-                                                1 /*num_samples*/);
-         if (!irb->singlesample_mt)
-            goto fail;
-         irb->singlesample_mt_is_tmp = true;
-         irb->need_downsample = true;
-      }
-
-      brw_renderbuffer_downsample(brw, irb);
-      mt = irb->singlesample_mt;
-
-      irb->need_map_upsample = mode & GL_MAP_WRITE_BIT;
-   } else {
-      mt = irb->mt;
-   }
-
-   /* For a window-system renderbuffer, we need to flip the mapping we receive
-    * upside-down.  So we need to ask for a rectangle on flipped vertically, and
-    * we then return a pointer to the bottom of it with a negative stride.
-    */
-   if (flip_y) {
-      y = rb->Height - y - h;
-   }
-
-   brw_miptree_map(brw, mt, irb->mt_level, irb->mt_layer,
-                   x, y, w, h, mode, &map, &stride);
-
-   if (flip_y) {
-      map += (h - 1) * stride;
-      stride = -stride;
-   }
-
-   DBG("%s: rb %d (%s) mt mapped: (%d, %d) (%dx%d) -> %p/%"PRIdPTR"\n",
-       __func__, rb->Name, _mesa_get_format_name(rb->Format),
-       x, y, w, h, map, stride);
-
-   *out_map = map;
-   *out_stride = stride;
-   return;
-
-fail:
-   *out_map = NULL;
-   *out_stride = 0;
-}
-
-/**
- * \see dd_function_table::UnmapRenderbuffer
- */
-static void
-brw_unmap_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb;
-   struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-   struct brw_mipmap_tree *mt;
-
-   DBG("%s: rb %d (%s)\n", __func__,
-       rb->Name, _mesa_get_format_name(rb->Format));
-
-   if (srb->Buffer) {
-      /* this is a malloc'd renderbuffer (accum buffer) */
-      /* nothing to do */
-      return;
-   }
-
-   if (rb->NumSamples > 1) {
-      mt = irb->singlesample_mt;
-   } else {
-      mt = irb->mt;
-   }
-
-   brw_miptree_unmap(brw, mt, irb->mt_level, irb->mt_layer);
-
-   if (irb->need_map_upsample) {
-      brw_renderbuffer_upsample(brw, irb);
-      irb->need_map_upsample = false;
-   }
-
-   if (irb->singlesample_mt_is_tmp)
-      brw_miptree_release(&irb->singlesample_mt);
-}
-
-
-/**
- * Round up the requested multisample count to the next supported sample size.
- */
-unsigned
-brw_quantize_num_samples(struct brw_screen *intel, unsigned num_samples)
-{
-   const int *msaa_modes = brw_supported_msaa_modes(intel);
-   int quantized_samples = 0;
-
-   for (int i = 0; msaa_modes[i] != -1; ++i) {
-      if (msaa_modes[i] >= num_samples)
-         quantized_samples = msaa_modes[i];
-      else
-         break;
-   }
-
-   return quantized_samples;
-}
-
-static mesa_format
-brw_renderbuffer_format(struct gl_context * ctx, GLenum internalFormat)
-{
-   struct brw_context *brw = brw_context(ctx);
-   ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   switch (internalFormat) {
-   default:
-      /* Use the same format-choice logic as for textures.
-       * Renderbuffers aren't any different from textures for us,
-       * except they're less useful because you can't texture with
-       * them.
-       */
-      return ctx->Driver.ChooseTextureFormat(ctx, GL_TEXTURE_2D,
-                                             internalFormat,
-                                             GL_NONE, GL_NONE);
-      break;
-   case GL_STENCIL_INDEX:
-   case GL_STENCIL_INDEX1_EXT:
-   case GL_STENCIL_INDEX4_EXT:
-   case GL_STENCIL_INDEX8_EXT:
-   case GL_STENCIL_INDEX16_EXT:
-      /* These aren't actual texture formats, so force them here. */
-      if (brw->has_separate_stencil) {
-         return MESA_FORMAT_S_UINT8;
-      } else {
-         assert(!devinfo->must_use_separate_stencil);
-         return MESA_FORMAT_Z24_UNORM_S8_UINT;
-      }
-   }
-}
-
-static GLboolean
-brw_alloc_private_renderbuffer_storage(struct gl_context *ctx,
-                                       struct gl_renderbuffer *rb,
-                                       GLenum internalFormat,
-                                       GLuint width, GLuint height)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_screen *screen = brw->screen;
-   struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-
-   assert(rb->Format != MESA_FORMAT_NONE);
-
-   rb->NumSamples = brw_quantize_num_samples(screen, rb->NumSamples);
-   rb->NumStorageSamples = rb->NumSamples;
-   rb->Width = width;
-   rb->Height = height;
-   rb->_BaseFormat = _mesa_get_format_base_format(rb->Format);
-
-   brw_miptree_release(&irb->mt);
-
-   DBG("%s: %s: %s (%dx%d)\n", __func__,
-       _mesa_enum_to_string(internalFormat),
-       _mesa_get_format_name(rb->Format), width, height);
-
-   if (width == 0 || height == 0)
-      return true;
-
-   irb->mt = brw_miptree_create_for_renderbuffer(brw, rb->Format,
-                                                 width, height,
-                                                 MAX2(rb->NumSamples, 1));
-   if (!irb->mt)
-      return false;
-
-   irb->layer_count = 1;
-
-   return true;
-}
-
-/**
- * Called via glRenderbufferStorageEXT() to set the format and allocate
- * storage for a user-created renderbuffer.
- */
-static GLboolean
-brw_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
-                                 GLenum internalFormat,
-                                 GLuint width, GLuint height)
-{
-   rb->Format = brw_renderbuffer_format(ctx, internalFormat);
-   return brw_alloc_private_renderbuffer_storage(ctx, rb, internalFormat, width, height);
-}
-
-static mesa_format
-fallback_rgbx_to_rgba(struct brw_screen *screen, struct gl_renderbuffer *rb,
-                      mesa_format original_format)
-{
-   mesa_format format = original_format;
-
-   /* The base format and internal format must be derived from the user-visible
-    * format (that is, the gl_config's format), even if we internally use
-    * choose a different format for the renderbuffer. Otherwise, rendering may
-    * use incorrect channel write masks.
-    */
-   rb->_BaseFormat = _mesa_get_format_base_format(original_format);
-   rb->InternalFormat = rb->_BaseFormat;
-
-   if (!screen->mesa_format_supports_render[original_format]) {
-      /* The glRenderbufferStorage paths in core Mesa detect if the driver
-       * does not support the user-requested format, and then searches for
-       * a fallback format. The DRI code bypasses core Mesa, though. So we do
-       * the fallbacks here.
-       *
-       * We must support MESA_FORMAT_R8G8B8X8 on Android because the Android
-       * framework requires HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces.
-       */
-      format = _mesa_format_fallback_rgbx_to_rgba(original_format);
-      assert(screen->mesa_format_supports_render[format]);
-   }
-   return format;
-}
-
-static void
-brw_image_target_renderbuffer_storage(struct gl_context *ctx,
-                                      struct gl_renderbuffer *rb,
-                                      void *image_handle)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_renderbuffer *irb;
-   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
-   __DRIimage *image;
-
-   image = dri_screen->dri2.image->lookupEGLImage(dri_screen, image_handle,
-                                                  dri_screen->loaderPrivate);
-   if (image == NULL)
-      return;
-
-   if (image->planar_format && image->planar_format->nplanes > 1) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-            "glEGLImageTargetRenderbufferStorage(planar buffers are not "
-               "supported as render targets.)");
-      return;
-   }
-
-   rb->Format = fallback_rgbx_to_rgba(brw->screen, rb, image->format);
-
-   mesa_format chosen_format = rb->Format == image->format ?
-      image->format : rb->Format;
-
-   /* __DRIimage is opaque to the core so it has to be checked here */
-   if (!brw->mesa_format_supports_render[chosen_format]) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-            "glEGLImageTargetRenderbufferStorage(unsupported image format)");
-      return;
-   }
-
-   irb = brw_renderbuffer(rb);
-   brw_miptree_release(&irb->mt);
-
-   /* Disable creation of the miptree's aux buffers because the driver exposes
-    * no EGL API to manage them. That is, there is no API for resolving the aux
-    * buffer's content to the main buffer nor for invalidating the aux buffer's
-    * content.
-    */
-   irb->mt = brw_miptree_create_for_dri_image(brw, image, GL_TEXTURE_2D,
-                                              rb->Format, false);
-   if (!irb->mt)
-      return;
-
-   rb->Width = image->width;
-   rb->Height = image->height;
-   rb->NeedsFinishRenderTexture = true;
-   irb->layer_count = 1;
-}
-
-/**
- * Called by _mesa_resize_framebuffer() for each hardware renderbuffer when a
- * window system framebuffer is resized.
- *
- * Any actual buffer reallocations for hardware renderbuffers (which would
- * have triggered _mesa_resize_framebuffer()) were done by
- * brw_process_dri2_buffer().
- */
-static GLboolean
-brw_alloc_window_storage(struct gl_context *ctx, struct gl_renderbuffer *rb,
-                         GLenum internalFormat, GLuint width, GLuint height)
-{
-   (void) ctx;
-   assert(rb->Name == 0);
-   rb->Width = width;
-   rb->Height = height;
-   rb->InternalFormat = internalFormat;
-
-   return true;
-}
-
-/** Dummy function for gl_renderbuffer::AllocStorage() */
-static GLboolean
-brw_nop_alloc_storage(struct gl_context *ctx, struct gl_renderbuffer *rb,
-                      GLenum internalFormat, GLuint width, GLuint height)
-{
-   (void) rb;
-   (void) internalFormat;
-   (void) width;
-   (void) height;
-   _mesa_problem(ctx, "brw_nop_alloc_storage should never be called.");
-   return false;
-}
-
-/**
- * Create an brw_renderbuffer for a __DRIdrawable. This function is
- * unrelated to GL renderbuffers (that is, those created by
- * glGenRenderbuffers).
- *
- * \param num_samples must be quantized.
- */
-struct brw_renderbuffer *
-brw_create_winsys_renderbuffer(struct brw_screen *screen,
-                               mesa_format format, unsigned num_samples)
-{
-   struct brw_renderbuffer *irb = CALLOC_STRUCT(brw_renderbuffer);
-   if (!irb)
-      return NULL;
-
-   struct gl_renderbuffer *rb = &irb->Base.Base;
-   irb->layer_count = 1;
-
-   _mesa_init_renderbuffer(rb, 0);
-   rb->ClassID = INTEL_RB_CLASS;
-   rb->NumSamples = num_samples;
-   rb->NumStorageSamples = num_samples;
-
-   rb->Format = fallback_rgbx_to_rgba(screen, rb, format);
-
-   /* intel-specific methods */
-   rb->Delete = brw_delete_renderbuffer;
-   rb->AllocStorage = brw_alloc_window_storage;
-
-   return irb;
-}
-
-/**
- * Private window-system buffers (as opposed to ones shared with the display
- * server created with brw_create_winsys_renderbuffer()) are most similar in their
- * handling to user-created renderbuffers, but they have a resize handler that
- * may be called at brw_update_renderbuffers() time.
- *
- * \param num_samples must be quantized.
- */
-struct brw_renderbuffer *
-brw_create_private_renderbuffer(struct brw_screen *screen,
-                                mesa_format format, unsigned num_samples)
-{
-   struct brw_renderbuffer *irb;
-
-   irb = brw_create_winsys_renderbuffer(screen, format, num_samples);
-   irb->Base.Base.AllocStorage = brw_alloc_private_renderbuffer_storage;
-
-   return irb;
-}
-
-/**
- * Create a new renderbuffer object.
- * Typically called via glBindRenderbufferEXT().
- */
-static struct gl_renderbuffer *
-brw_new_renderbuffer(struct gl_context *ctx, GLuint name)
-{
-   struct brw_renderbuffer *irb;
-   struct gl_renderbuffer *rb;
-
-   irb = CALLOC_STRUCT(brw_renderbuffer);
-   if (!irb) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer");
-      return NULL;
-   }
-
-   rb = &irb->Base.Base;
-
-   _mesa_init_renderbuffer(rb, name);
-   rb->ClassID = INTEL_RB_CLASS;
-
-   /* intel-specific methods */
-   rb->Delete = brw_delete_renderbuffer;
-   rb->AllocStorage = brw_alloc_renderbuffer_storage;
-   /* span routines set in alloc_storage function */
-
-   return rb;
-}
-
-static bool
-brw_renderbuffer_update_wrapper(struct brw_context *brw,
-                                struct brw_renderbuffer *irb,
-                                struct gl_texture_image *image,
-                                uint32_t layer,
-                                bool layered)
-{
-   struct gl_renderbuffer *rb = &irb->Base.Base;
-   struct brw_texture_image *intel_image = brw_texture_image(image);
-   struct brw_mipmap_tree *mt = intel_image->mt;
-   int level = image->Level;
-
-   rb->AllocStorage = brw_nop_alloc_storage;
-
-   /* adjust for texture view parameters */
-   layer += image->TexObject->Attrib.MinLayer;
-   level += image->TexObject->Attrib.MinLevel;
-
-   brw_miptree_check_level_layer(mt, level, layer);
-   irb->mt_level = level;
-   irb->mt_layer = layer;
-
-   if (!layered) {
-      irb->layer_count = 1;
-   } else if (mt->target != GL_TEXTURE_3D && image->TexObject->Attrib.NumLayers > 0) {
-      irb->layer_count = image->TexObject->Attrib.NumLayers;
-   } else {
-      irb->layer_count = mt->surf.dim == ISL_SURF_DIM_3D ?
-                            minify(mt->surf.logical_level0_px.depth, level) :
-                            mt->surf.logical_level0_px.array_len;
-   }
-
-   brw_miptree_reference(&irb->mt, mt);
-
-   brw_renderbuffer_set_draw_offset(irb);
-
-   return true;
-}
-
-void
-brw_renderbuffer_set_draw_offset(struct brw_renderbuffer *irb)
-{
-   unsigned int dst_x, dst_y;
-
-   /* compute offset of the particular 2D image within the texture region */
-   brw_miptree_get_image_offset(irb->mt, irb->mt_level, irb->mt_layer,
-                                &dst_x, &dst_y);
-
-   irb->draw_x = dst_x;
-   irb->draw_y = dst_y;
-}
-
-/**
- * Called by glFramebufferTexture[123]DEXT() (and other places) to
- * prepare for rendering into texture memory.  This might be called
- * many times to choose different texture levels, cube faces, etc
- * before brw_finish_render_texture() is ever called.
- */
-static void
-brw_render_texture(struct gl_context * ctx,
-                   struct gl_framebuffer *fb,
-                   struct gl_renderbuffer_attachment *att)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct gl_renderbuffer *rb = att->Renderbuffer;
-   struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-   struct gl_texture_image *image = rb->TexImage;
-   struct brw_texture_image *intel_image = brw_texture_image(image);
-   struct brw_mipmap_tree *mt = intel_image->mt;
-   int layer;
-
-   (void) fb;
-
-   if (att->CubeMapFace > 0) {
-      assert(att->Zoffset == 0);
-      layer = att->CubeMapFace;
-   } else {
-      layer = att->Zoffset;
-   }
-
-   if (!intel_image->mt) {
-      /* Fallback on drawing to a texture that doesn't have a miptree
-       * (has a border, width/height 0, etc.)
-       */
-      _swrast_render_texture(ctx, fb, att);
-      return;
-   }
-
-   brw_miptree_check_level_layer(mt, att->TextureLevel, layer);
-
-   if (!brw_renderbuffer_update_wrapper(brw, irb, image, layer, att->Layered)) {
-       _swrast_render_texture(ctx, fb, att);
-       return;
-   }
-
-   DBG("Begin render %s texture tex=%u w=%d h=%d d=%d refcount=%d\n",
-       _mesa_get_format_name(image->TexFormat),
-       att->Texture->Name, image->Width, image->Height, image->Depth,
-       rb->RefCount);
-}
-
-
-#define fbo_incomplete(fb, error_id, ...) do {                                          \
-      static GLuint msg_id = 0;                                               \
-      if (unlikely(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) {    \
-         _mesa_gl_debugf(ctx, &msg_id,                                        \
-                         MESA_DEBUG_SOURCE_API,                               \
-                         MESA_DEBUG_TYPE_OTHER,                               \
-                         MESA_DEBUG_SEVERITY_MEDIUM,                          \
-                         __VA_ARGS__);                                        \
-      }                                                                       \
-      DBG(__VA_ARGS__);                                                       \
-      fb->_Status = error_id;                                                 \
-   } while (0)
-
-/**
- * Do additional "completeness" testing of a framebuffer object.
- */
-static void
-brw_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct brw_renderbuffer *depthRb =
-      brw_get_renderbuffer(fb, BUFFER_DEPTH);
-   struct brw_renderbuffer *stencilRb =
-      brw_get_renderbuffer(fb, BUFFER_STENCIL);
-   struct brw_mipmap_tree *depth_mt = NULL, *stencil_mt = NULL;
-   unsigned i;
-
-   DBG("%s() on fb %p (%s)\n", __func__,
-       fb, (fb == ctx->DrawBuffer ? "drawbuffer" :
-            (fb == ctx->ReadBuffer ? "readbuffer" : "other buffer")));
-
-   if (depthRb)
-      depth_mt = depthRb->mt;
-   if (stencilRb) {
-      stencil_mt = stencilRb->mt;
-      if (stencil_mt->stencil_mt)
-         stencil_mt = stencil_mt->stencil_mt;
-   }
-
-   if (depth_mt && stencil_mt) {
-      if (devinfo->ver >= 6) {
-         const unsigned d_width = depth_mt->surf.phys_level0_sa.width;
-         const unsigned d_height = depth_mt->surf.phys_level0_sa.height;
-         const unsigned d_depth = depth_mt->surf.dim == ISL_SURF_DIM_3D ?
-                                     depth_mt->surf.phys_level0_sa.depth :
-                                     depth_mt->surf.phys_level0_sa.array_len;
-
-         const unsigned s_width = stencil_mt->surf.phys_level0_sa.width;
-         const unsigned s_height = stencil_mt->surf.phys_level0_sa.height;
-         const unsigned s_depth = stencil_mt->surf.dim == ISL_SURF_DIM_3D ?
-                                     stencil_mt->surf.phys_level0_sa.depth :
-                                     stencil_mt->surf.phys_level0_sa.array_len;
-
-         /* For gen >= 6, we are using the lod/minimum-array-element fields
-          * and supporting layered rendering. This means that we must restrict
-          * the depth & stencil attachments to match in various more retrictive
-          * ways. (width, height, depth, LOD and layer)
-          */
-         if (d_width != s_width ||
-             d_height != s_height ||
-             d_depth != s_depth ||
-             depthRb->mt_level != stencilRb->mt_level ||
-             depthRb->mt_layer != stencilRb->mt_layer) {
-            fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
-                           "FBO incomplete: depth and stencil must match in"
-                           "width, height, depth, LOD and layer\n");
-         }
-      }
-      if (depth_mt == stencil_mt) {
-         /* For true packed depth/stencil (not faked on prefers-separate-stencil
-          * hardware) we need to be sure they're the same level/layer, since
-          * we'll be emitting a single packet describing the packed setup.
-          */
-         if (depthRb->mt_level != stencilRb->mt_level ||
-             depthRb->mt_layer != stencilRb->mt_layer) {
-            fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
-                           "FBO incomplete: depth image level/layer %d/%d != "
-                           "stencil image %d/%d\n",
-                           depthRb->mt_level,
-                           depthRb->mt_layer,
-                           stencilRb->mt_level,
-                           stencilRb->mt_layer);
-         }
-      } else {
-         if (!brw->has_separate_stencil) {
-            fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
-                      "FBO incomplete: separate stencil unsupported\n");
-         }
-         if (stencil_mt->format != MESA_FORMAT_S_UINT8) {
-            fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
-                      "FBO incomplete: separate stencil is %s "
-                      "instead of S8\n",
-                      _mesa_get_format_name(stencil_mt->format));
-         }
-         if (devinfo->ver < 7 && !brw_renderbuffer_has_hiz(depthRb)) {
-            /* Before Gfx7, separate depth and stencil buffers can be used
-             * only if HiZ is enabled. From the Sandybridge PRM, Volume 2,
-             * Part 1, Bit 3DSTATE_DEPTH_BUFFER.SeparateStencilBufferEnable:
-             *     [DevSNB]: This field must be set to the same value (enabled
-             *     or disabled) as Hierarchical Depth Buffer Enable.
-             */
-            fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
-                          "FBO incomplete: separate stencil without HiZ\n");
-         }
-      }
-   }
-
-   for (i = 0; i < ARRAY_SIZE(fb->Attachment); i++) {
-      struct gl_renderbuffer *rb;
-      struct brw_renderbuffer *irb;
-
-      if (fb->Attachment[i].Type == GL_NONE)
-         continue;
-
-      /* A supported attachment will have a Renderbuffer set either
-       * from being a Renderbuffer or being a texture that got the
-       * brw_wrap_texture() treatment.
-       */
-      rb = fb->Attachment[i].Renderbuffer;
-      if (rb == NULL) {
-         fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
-                       "FBO incomplete: attachment without "
-                       "renderbuffer\n");
-         continue;
-      }
-
-      if (fb->Attachment[i].Type == GL_TEXTURE) {
-         if (rb->TexImage->Border) {
-            fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
-                      "FBO incomplete: texture with border\n");
-            continue;
-         }
-      }
-
-      irb = brw_renderbuffer(rb);
-      if (irb == NULL) {
-         fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
-                   "FBO incomplete: software rendering renderbuffer\n");
-         continue;
-      }
-
-     if (rb->Format == MESA_FORMAT_R_SRGB8) {
-        fbo_incomplete(fb, GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT,
-                       "FBO incomplete: Format not color renderable: %s\n",
-                       _mesa_get_format_name(rb->Format));
-        continue;
-     }
-
-      if (!brw_render_target_supported(brw, rb)) {
-         fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
-                   "FBO incomplete: Unsupported HW "
-                   "texture/renderbuffer format attached: %s\n",
-                   _mesa_get_format_name(brw_rb_format(irb)));
-      }
-   }
-}
-
-/**
- * Try to do a glBlitFramebuffer using glCopyTexSubImage2D
- * We can do this when the dst renderbuffer is actually a texture and
- * there is no scaling, mirroring or scissoring.
- *
- * \return new buffer mask indicating the buffers left to blit using the
- *         normal path.
- */
-static GLbitfield
-brw_blit_framebuffer_with_blitter(struct gl_context *ctx,
-                                  const struct gl_framebuffer *readFb,
-                                  const struct gl_framebuffer *drawFb,
-                                  GLint srcX0, GLint srcY0,
-                                  GLint srcX1, GLint srcY1,
-                                  GLint dstX0, GLint dstY0,
-                                  GLint dstX1, GLint dstY1,
-                                  GLbitfield mask)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   /* Sync up the state of window system buffers.  We need to do this before
-    * we go looking for the buffers.
-    */
-   brw_prepare_render(brw);
-
-   if (mask & GL_COLOR_BUFFER_BIT) {
-      unsigned i;
-      struct gl_renderbuffer *src_rb = readFb->_ColorReadBuffer;
-      struct brw_renderbuffer *src_irb = brw_renderbuffer(src_rb);
-
-      if (!src_irb) {
-         perf_debug("glBlitFramebuffer(): missing src renderbuffer.  "
-                    "Falling back to software rendering.\n");
-         return mask;
-      }
-
-      /* If the source and destination are the same size with no mirroring,
-       * the rectangles are within the size of the texture and there is no
-       * scissor, then we can probably use the blit engine.
-       */
-      if (!(srcX0 - srcX1 == dstX0 - dstX1 &&
-            srcY0 - srcY1 == dstY0 - dstY1 &&
-            srcX1 >= srcX0 &&
-            srcY1 >= srcY0 &&
-            srcX0 >= 0 && srcX1 <= readFb->Width &&
-            srcY0 >= 0 && srcY1 <= readFb->Height &&
-            dstX0 >= 0 && dstX1 <= drawFb->Width &&
-            dstY0 >= 0 && dstY1 <= drawFb->Height &&
-            !(ctx->Scissor.EnableFlags))) {
-         perf_debug("glBlitFramebuffer(): non-1:1 blit.  "
-                    "Falling back to software rendering.\n");
-         return mask;
-      }
-
-      /* Blit to all active draw buffers.  We don't do any pre-checking,
-       * because we assume that copying to MRTs is rare, and failure midway
-       * through copying is even more rare.  Even if it was to occur, it's
-       * safe to let meta start the copy over from scratch, because
-       * glBlitFramebuffer completely overwrites the destination pixels, and
-       * results are undefined if any destination pixels have a dependency on
-       * source pixels.
-       */
-      for (i = 0; i < drawFb->_NumColorDrawBuffers; i++) {
-         struct gl_renderbuffer *dst_rb = drawFb->_ColorDrawBuffers[i];
-         struct brw_renderbuffer *dst_irb = brw_renderbuffer(dst_rb);
-
-         if (!dst_irb) {
-            perf_debug("glBlitFramebuffer(): missing dst renderbuffer.  "
-                       "Falling back to software rendering.\n");
-            return mask;
-         }
-
-         if (ctx->Color.sRGBEnabled &&
-             _mesa_is_format_srgb(src_irb->mt->format) !=
-             _mesa_is_format_srgb(dst_irb->mt->format)) {
-            perf_debug("glBlitFramebuffer() with sRGB conversion cannot be "
-                       "handled by BLT path.\n");
-            return mask;
-         }
-
-         if (!brw_miptree_blit(brw,
-                               src_irb->mt,
-                               src_irb->mt_level, src_irb->mt_layer,
-                               srcX0, srcY0, readFb->FlipY,
-                               dst_irb->mt,
-                               dst_irb->mt_level, dst_irb->mt_layer,
-                               dstX0, dstY0, drawFb->FlipY,
-                               dstX1 - dstX0, dstY1 - dstY0,
-                               COLOR_LOGICOP_COPY)) {
-            perf_debug("glBlitFramebuffer(): unknown blit failure.  "
-                       "Falling back to software rendering.\n");
-            return mask;
-         }
-      }
-
-      mask &= ~GL_COLOR_BUFFER_BIT;
-   }
-
-   return mask;
-}
-
-static void
-brw_blit_framebuffer(struct gl_context *ctx,
-                     struct gl_framebuffer *readFb,
-                     struct gl_framebuffer *drawFb,
-                     GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
-                     GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
-                     GLbitfield mask, GLenum filter)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   /* Page 679 of OpenGL 4.4 spec says:
-    *    "Added BlitFramebuffer to commands affected by conditional rendering in
-    *     section 10.10 (Bug 9562)."
-    */
-   if (!_mesa_check_conditional_render(ctx))
-      return;
-
-   if (devinfo->ver < 6) {
-      /* On gfx4-5, try BLT first.
-       *
-       * Gfx4-5 have a single ring for both 3D and BLT operations, so there's
-       * no inter-ring synchronization issues like on Gfx6+.  It is apparently
-       * faster than using the 3D pipeline.  Original Gfx4 also has to rebase
-       * and copy miptree slices in order to render to unaligned locations.
-       */
-      mask = brw_blit_framebuffer_with_blitter(ctx, readFb, drawFb,
-                                               srcX0, srcY0, srcX1, srcY1,
-                                               dstX0, dstY0, dstX1, dstY1,
-                                               mask);
-      if (mask == 0x0)
-         return;
-   }
-
-   mask = brw_blorp_framebuffer(brw, readFb, drawFb,
-                                srcX0, srcY0, srcX1, srcY1,
-                                dstX0, dstY0, dstX1, dstY1,
-                                mask, filter);
-   if (mask == 0x0)
-      return;
-
-   /* brw_blorp_framebuffer should always be successful for color blits. */
-   assert(!(mask & GL_COLOR_BUFFER_BIT));
-
-   mask = _mesa_meta_BlitFramebuffer(ctx, readFb, drawFb,
-                                     srcX0, srcY0, srcX1, srcY1,
-                                     dstX0, dstY0, dstX1, dstY1,
-                                     mask, filter);
-   if (mask == 0x0)
-      return;
-
-   if (devinfo->ver >= 8 && (mask & GL_STENCIL_BUFFER_BIT)) {
-      assert(!"Invalid blit");
-   }
-
-   _swrast_BlitFramebuffer(ctx, readFb, drawFb,
-                           srcX0, srcY0, srcX1, srcY1,
-                           dstX0, dstY0, dstX1, dstY1,
-                           mask, filter);
-}
-
-/**
- * Does the renderbuffer have hiz enabled?
- */
-bool
-brw_renderbuffer_has_hiz(struct brw_renderbuffer *irb)
-{
-   return brw_miptree_level_has_hiz(irb->mt, irb->mt_level);
-}
-
-void
-brw_renderbuffer_move_to_temp(struct brw_context *brw,
-                                struct brw_renderbuffer *irb,
-                                bool invalidate)
-{
-   struct gl_renderbuffer *rb =&irb->Base.Base;
-   struct brw_texture_image *intel_image = brw_texture_image(rb->TexImage);
-   struct brw_mipmap_tree *new_mt;
-   int width, height, depth;
-
-   brw_get_image_dims(rb->TexImage, &width, &height, &depth);
-
-   assert(irb->align_wa_mt == NULL);
-   new_mt = brw_miptree_create(brw, GL_TEXTURE_2D,
-                               intel_image->base.Base.TexFormat,
-                               0, 0,
-                               width, height, 1,
-                               irb->mt->surf.samples,
-                               MIPTREE_CREATE_BUSY);
-
-   if (!invalidate) {
-      brw_miptree_copy_slice(brw, intel_image->mt,
-                             intel_image->base.Base.Level, irb->mt_layer,
-                             new_mt, 0, 0);
-   }
-
-   brw_miptree_reference(&irb->align_wa_mt, new_mt);
-   brw_miptree_release(&new_mt);
-
-   irb->draw_x = 0;
-   irb->draw_y = 0;
-}
-
-void
-brw_cache_sets_clear(struct brw_context *brw)
-{
-   hash_table_foreach(brw->render_cache, render_entry)
-      _mesa_hash_table_remove(brw->render_cache, render_entry);
-
-   set_foreach(brw->depth_cache, depth_entry)
-      _mesa_set_remove(brw->depth_cache, depth_entry);
-}
-
-/**
- * Emits an appropriate flush for a BO if it has been rendered to within the
- * same batchbuffer as a read that's about to be emitted.
- *
- * The GPU has separate, incoherent caches for the render cache and the
- * sampler cache, along with other caches.  Usually data in the different
- * caches don't interact (e.g. we don't render to our driver-generated
- * immediate constant data), but for render-to-texture in FBOs we definitely
- * do.  When a batchbuffer is flushed, the kernel will ensure that everything
- * necessary is flushed before another use of that BO, but for reuse from
- * different caches within a batchbuffer, it's all our responsibility.
- */
-static void
-flush_depth_and_render_caches(struct brw_context *brw, struct brw_bo *bo)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (devinfo->ver >= 6) {
-      brw_emit_pipe_control_flush(brw,
-                                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                                  PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                                  PIPE_CONTROL_CS_STALL);
-
-      brw_emit_pipe_control_flush(brw,
-                                  PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
-                                  PIPE_CONTROL_CONST_CACHE_INVALIDATE);
-   } else {
-      brw_emit_mi_flush(brw);
-   }
-
-   brw_cache_sets_clear(brw);
-}
-
-void
-brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo)
-{
-   if (_mesa_hash_table_search(brw->render_cache, bo) ||
-       _mesa_set_search(brw->depth_cache, bo))
-      flush_depth_and_render_caches(brw, bo);
-}
-
-static void *
-format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage)
-{
-   return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage);
-}
-
-void
-brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo,
-                           enum isl_format format,
-                           enum isl_aux_usage aux_usage)
-{
-   if (_mesa_set_search(brw->depth_cache, bo))
-      flush_depth_and_render_caches(brw, bo);
-
-   /* Check to see if this bo has been used by a previous rendering operation
-    * but with a different format or aux usage.  If it has, flush the render
-    * cache so we ensure that it's only in there with one format or aux usage
-    * at a time.
-    *
-    * Even though it's not obvious, this can easily happen in practice.
-    * Suppose a client is blending on a surface with sRGB encode enabled on
-    * gfx9.  This implies that you get AUX_USAGE_CCS_D at best.  If the client
-    * then disables sRGB decode and continues blending we will flip on
-    * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is
-    * perfectly valid since CCS_E is a subset of CCS_D).  However, this means
-    * that we have fragments in-flight which are rendering with UNORM+CCS_E
-    * and other fragments in-flight with SRGB+CCS_D on the same surface at the
-    * same time and the pixel scoreboard and color blender are trying to sort
-    * it all out.  This ends badly (i.e. GPU hangs).
-    *
-    * To date, we have never observed GPU hangs or even corruption to be
-    * associated with switching the format, only the aux usage.  However,
-    * there are comments in various docs which indicate that the render cache
-    * isn't 100% resilient to format changes.  We may as well be conservative
-    * and flush on format changes too.  We can always relax this later if we
-    * find it to be a performance problem.
-    */
-   struct hash_entry *entry = _mesa_hash_table_search(brw->render_cache, bo);
-   if (entry && entry->data != format_aux_tuple(format, aux_usage))
-      flush_depth_and_render_caches(brw, bo);
-}
-
-void
-brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo,
-                        enum isl_format format,
-                        enum isl_aux_usage aux_usage)
-{
-#ifndef NDEBUG
-   struct hash_entry *entry = _mesa_hash_table_search(brw->render_cache, bo);
-   if (entry) {
-      /* Otherwise, someone didn't do a flush_for_render and that would be
-       * very bad indeed.
-       */
-      assert(entry->data == format_aux_tuple(format, aux_usage));
-   }
-#endif
-
-   _mesa_hash_table_insert(brw->render_cache, bo,
-                           format_aux_tuple(format, aux_usage));
-}
-
-void
-brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo)
-{
-   if (_mesa_hash_table_search(brw->render_cache, bo))
-      flush_depth_and_render_caches(brw, bo);
-}
-
-void
-brw_depth_cache_add_bo(struct brw_context *brw, struct brw_bo *bo)
-{
-   _mesa_set_add(brw->depth_cache, bo);
-}
-
-/**
- * Do one-time context initializations related to GL_EXT_framebuffer_object.
- * Hook in device driver functions.
- */
-void
-brw_fbo_init(struct brw_context *brw)
-{
-   struct dd_function_table *dd = &brw->ctx.Driver;
-   dd->NewRenderbuffer = brw_new_renderbuffer;
-   dd->MapRenderbuffer = brw_map_renderbuffer;
-   dd->UnmapRenderbuffer = brw_unmap_renderbuffer;
-   dd->RenderTexture = brw_render_texture;
-   dd->ValidateFramebuffer = brw_validate_framebuffer;
-   dd->BlitFramebuffer = brw_blit_framebuffer;
-   dd->EGLImageTargetRenderbufferStorage =
-      brw_image_target_renderbuffer_storage;
-
-   brw->render_cache = _mesa_hash_table_create(brw->mem_ctx, _mesa_hash_pointer,
-                                               _mesa_key_pointer_equal);
-   brw->depth_cache = _mesa_set_create(brw->mem_ctx, _mesa_hash_pointer,
-                                       _mesa_key_pointer_equal);
-   util_dynarray_init(&brw->batch.exec_fences, NULL);
-}
diff --git a/src/mesa/drivers/dri/i965/brw_fbo.h b/src/mesa/drivers/dri/i965/brw_fbo.h
deleted file mode 100644
index b4dcb9a..0000000
--- a/src/mesa/drivers/dri/i965/brw_fbo.h
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef BRW_FBO_H
-#define BRW_FBO_H
-
-#include <stdbool.h>
-#include <assert.h>
-#include "main/formats.h"
-#include "main/macros.h"
-#include "brw_context.h"
-#include "brw_mipmap_tree.h"
-#include "brw_screen.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct brw_mipmap_tree;
-
-/**
- * Intel renderbuffer, derived from gl_renderbuffer.
- */
-struct brw_renderbuffer
-{
-   struct swrast_renderbuffer Base;
-   /**
-    * The real renderbuffer storage.
-    *
-    * This is multisampled if NumSamples is > 1.
-    */
-   struct brw_mipmap_tree *mt;
-
-   /**
-    * Downsampled contents for window-system MSAA renderbuffers.
-    *
-    * For window system MSAA color buffers, the singlesample_mt is shared with
-    * other processes in DRI2 (and in DRI3, it's the image buffer managed by
-    * glx_dri3.c), while mt is private to our process.  To do a swapbuffers,
-    * we have to downsample out of mt into singlesample_mt.  For depth and
-    * stencil buffers, the singlesample_mt is also private, and since we don't
-    * expect to need to do resolves (except if someone does a glReadPixels()
-    * or glCopyTexImage()), we just temporarily allocate singlesample_mt when
-    * asked to map the renderbuffer.
-    */
-   struct brw_mipmap_tree *singlesample_mt;
-
-   /* Gen < 6 doesn't have layer specifier for render targets or depth. Driver
-    * needs to manually offset surfaces to correct level/layer. There are,
-    * however, alignment restrictions to respect as well and in come cases
-    * the only option is to use temporary single slice surface which driver
-    * copies after rendering to the full miptree.
-    *
-    * See brw_renderbuffer_move_to_temp().
-    */
-   struct brw_mipmap_tree *align_wa_mt;
-
-   /**
-    * \name Miptree view
-    * \{
-    *
-    * Multiple renderbuffers may simultaneously wrap a single texture and each
-    * provide a different view into that texture. The fields below indicate
-    * which miptree slice is wrapped by this renderbuffer.  The fields' values
-    * are consistent with the 'level' and 'layer' parameters of
-    * glFramebufferTextureLayer().
-    *
-    * For renderbuffers not created with glFramebufferTexture*(), mt_level and
-    * mt_layer are 0.
-    */
-   unsigned int mt_level;
-   unsigned int mt_layer;
-
-   /* The number of attached logical layers. */
-   unsigned int layer_count;
-   /** \} */
-
-   GLuint draw_x, draw_y; /**< Offset of drawing within the region */
-
-   /**
-    * Set to true at every draw call, to indicate if a window-system
-    * renderbuffer needs to be downsampled before using singlesample_mt.
-    */
-   bool need_downsample;
-
-   /**
-    * Set to true when doing an brw_renderbuffer_map()/unmap() that requires
-    * an upsample at the end.
-    */
-   bool need_map_upsample;
-
-   /**
-    * Set to true if singlesample_mt is temporary storage that persists only
-    * for the duration of a mapping.
-    */
-   bool singlesample_mt_is_tmp;
-
-   /**
-    * Set to true when application specifically asked for a sRGB visual.
-    */
-   bool need_srgb;
-};
-
-
-/**
- * gl_renderbuffer is a base class which we subclass.  The Class field
- * is used for simple run-time type checking.
- */
-#define INTEL_RB_CLASS 0x12345678
-
-
-/**
- * Return a gl_renderbuffer ptr casted to brw_renderbuffer.
- * NULL will be returned if the rb isn't really an brw_renderbuffer.
- * This is determined by checking the ClassID.
- */
-static inline struct brw_renderbuffer *
-brw_renderbuffer(struct gl_renderbuffer *rb)
-{
-   struct brw_renderbuffer *irb = (struct brw_renderbuffer *) rb;
-   if (irb && irb->Base.Base.ClassID == INTEL_RB_CLASS)
-      return irb;
-   else
-      return NULL;
-}
-
-static inline struct brw_mipmap_tree *
-brw_renderbuffer_get_mt(struct brw_renderbuffer *irb)
-{
-   if (!irb)
-      return NULL;
-
-   return (irb->align_wa_mt) ? irb->align_wa_mt : irb->mt;
-}
-
-/**
- * \brief Return the framebuffer attachment specified by attIndex.
- *
- * If the framebuffer lacks the specified attachment, then return null.
- *
- * If the attached renderbuffer is a wrapper, then return wrapped
- * renderbuffer.
- */
-static inline struct brw_renderbuffer *
-brw_get_renderbuffer(struct gl_framebuffer *fb, gl_buffer_index attIndex)
-{
-   struct gl_renderbuffer *rb;
-
-   assert((unsigned)attIndex < ARRAY_SIZE(fb->Attachment));
-
-   rb = fb->Attachment[attIndex].Renderbuffer;
-   if (!rb)
-      return NULL;
-
-   return brw_renderbuffer(rb);
-}
-
-
-static inline mesa_format
-brw_rb_format(const struct brw_renderbuffer *rb)
-{
-   return rb->Base.Base.Format;
-}
-
-extern struct brw_renderbuffer *
-brw_create_winsys_renderbuffer(struct brw_screen *screen,
-                               mesa_format format, unsigned num_samples);
-
-struct brw_renderbuffer *
-brw_create_private_renderbuffer(struct brw_screen *screen,
-                                mesa_format format, unsigned num_samples);
-
-struct gl_renderbuffer*
-brw_create_wrapped_renderbuffer(struct gl_context *ctx,
-                                int width, int height,
-                                mesa_format format);
-
-extern void
-brw_fbo_init(struct brw_context *brw);
-
-void
-brw_renderbuffer_set_draw_offset(struct brw_renderbuffer *irb);
-
-static inline uint32_t
-brw_renderbuffer_get_tile_offsets(struct brw_renderbuffer *irb,
-                                  uint32_t *tile_x,
-                                  uint32_t *tile_y)
-{
-   if (irb->align_wa_mt) {
-      *tile_x = 0;
-      *tile_y = 0;
-      return 0;
-   }
-
-   return brw_miptree_get_tile_offsets(irb->mt, irb->mt_level, irb->mt_layer,
-                                       tile_x, tile_y);
-}
-
-bool
-brw_renderbuffer_has_hiz(struct brw_renderbuffer *irb);
-
-
-void brw_renderbuffer_move_to_temp(struct brw_context *brw,
-                                     struct brw_renderbuffer *irb,
-                                     bool invalidate);
-
-void
-brw_renderbuffer_downsample(struct brw_context *brw,
-                            struct brw_renderbuffer *irb);
-
-void
-brw_renderbuffer_upsample(struct brw_context *brw,
-                          struct brw_renderbuffer *irb);
-
-void brw_cache_sets_clear(struct brw_context *brw);
-void brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo);
-void brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo,
-                                enum isl_format format,
-                                enum isl_aux_usage aux_usage);
-void brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo);
-void brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo,
-                             enum isl_format format,
-                             enum isl_aux_usage aux_usage);
-void brw_depth_cache_add_bo(struct brw_context *brw, struct brw_bo *bo);
-
-unsigned
-brw_quantize_num_samples(struct brw_screen *intel, unsigned num_samples);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* BRW_FBO_H */
diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.c b/src/mesa/drivers/dri/i965/brw_ff_gs.c
deleted file mode 100644
index 93bbc9e..0000000
--- a/src/mesa/drivers/dri/i965/brw_ff_gs.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-#include "main/macros.h"
-#include "main/enums.h"
-#include "main/transformfeedback.h"
-
-#include "brw_batch.h"
-
-#include "brw_defines.h"
-#include "brw_context.h"
-#include "brw_util.h"
-#include "brw_state.h"
-#include "brw_ff_gs.h"
-#include "util/ralloc.h"
-
-static void
-compile_ff_gs_prog(struct brw_context *brw,
-		   struct brw_ff_gs_prog_key *key)
-{
-   const GLuint *program;
-   void *mem_ctx;
-   GLuint program_size;
-
-   mem_ctx = ralloc_context(NULL);
-
-   struct brw_ff_gs_prog_data prog_data;
-   program = brw_compile_ff_gs_prog(brw->screen->compiler, mem_ctx, key,
-                                    &prog_data,
-                                    &brw_vue_prog_data(brw->vs.base.prog_data)->vue_map,
-                                    &program_size);
-
-   brw_upload_cache(&brw->cache, BRW_CACHE_FF_GS_PROG,
-                    key, sizeof(*key),
-                    program, program_size,
-                    &prog_data, sizeof(prog_data),
-                    &brw->ff_gs.prog_offset, &brw->ff_gs.prog_data);
-   ralloc_free(mem_ctx);
-}
-
-static bool
-brw_ff_gs_state_dirty(const struct brw_context *brw)
-{
-   return brw_state_dirty(brw,
-                          _NEW_LIGHT,
-                          BRW_NEW_PRIMITIVE |
-                          BRW_NEW_TRANSFORM_FEEDBACK |
-                          BRW_NEW_VS_PROG_DATA);
-}
-
-static void
-brw_ff_gs_populate_key(struct brw_context *brw,
-                       struct brw_ff_gs_prog_key *key)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   static const unsigned swizzle_for_offset[4] = {
-      BRW_SWIZZLE4(0, 1, 2, 3),
-      BRW_SWIZZLE4(1, 2, 3, 3),
-      BRW_SWIZZLE4(2, 3, 3, 3),
-      BRW_SWIZZLE4(3, 3, 3, 3)
-   };
-
-   struct gl_context *ctx = &brw->ctx;
-
-   assert(devinfo->ver < 7);
-
-   memset(key, 0, sizeof(*key));
-
-   /* BRW_NEW_VS_PROG_DATA (part of VUE map) */
-   key->attrs = brw_vue_prog_data(brw->vs.base.prog_data)->vue_map.slots_valid;
-
-   /* BRW_NEW_PRIMITIVE */
-   key->primitive = brw->primitive;
-
-   /* _NEW_LIGHT */
-   key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
-   if (key->primitive == _3DPRIM_QUADLIST && ctx->Light.ShadeModel != GL_FLAT) {
-      /* Provide consistent primitive order with brw_set_prim's
-       * optimization of single quads to trifans.
-       */
-      key->pv_first = true;
-   }
-
-   if (devinfo->ver == 6) {
-      /* On Gfx6, GS is used for transform feedback. */
-      /* BRW_NEW_TRANSFORM_FEEDBACK */
-      if (_mesa_is_xfb_active_and_unpaused(ctx)) {
-         const struct gl_program *prog =
-            ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
-         const struct gl_transform_feedback_info *linked_xfb_info =
-            prog->sh.LinkedTransformFeedback;
-         int i;
-
-         /* Make sure that the VUE slots won't overflow the unsigned chars in
-          * key->transform_feedback_bindings[].
-          */
-         STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);
-
-         /* Make sure that we don't need more binding table entries than we've
-          * set aside for use in transform feedback.  (We shouldn't, since we
-          * set aside enough binding table entries to have one per component).
-          */
-         assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
-
-         key->need_gs_prog = true;
-         key->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
-         for (i = 0; i < key->num_transform_feedback_bindings; ++i) {
-            key->transform_feedback_bindings[i] =
-               linked_xfb_info->Outputs[i].OutputRegister;
-            key->transform_feedback_swizzles[i] =
-               swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
-         }
-      }
-   } else {
-      /* Pre-gfx6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP
-       * into simpler primitives.
-       */
-      key->need_gs_prog = (brw->primitive == _3DPRIM_QUADLIST ||
-                           brw->primitive == _3DPRIM_QUADSTRIP ||
-                           brw->primitive == _3DPRIM_LINELOOP);
-   }
-}
-
-/* Calculate interpolants for triangle and line rasterization.
- */
-void
-brw_upload_ff_gs_prog(struct brw_context *brw)
-{
-   struct brw_ff_gs_prog_key key;
-
-   if (!brw_ff_gs_state_dirty(brw))
-      return;
-
-   /* Populate the key:
-    */
-   brw_ff_gs_populate_key(brw, &key);
-
-   if (brw->ff_gs.prog_active != key.need_gs_prog) {
-      brw->ctx.NewDriverState |= BRW_NEW_FF_GS_PROG_DATA;
-      brw->ff_gs.prog_active = key.need_gs_prog;
-   }
-
-   if (brw->ff_gs.prog_active) {
-      if (!brw_search_cache(&brw->cache, BRW_CACHE_FF_GS_PROG, &key,
-                            sizeof(key), &brw->ff_gs.prog_offset,
-                            &brw->ff_gs.prog_data, true)) {
-         compile_ff_gs_prog(brw, &key);
-      }
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.h b/src/mesa/drivers/dri/i965/brw_ff_gs.h
deleted file mode 100644
index e35c9d3..0000000
--- a/src/mesa/drivers/dri/i965/brw_ff_gs.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-
-#ifndef BRW_GS_H
-#define BRW_GS_H
-
-#include "brw_context.h"
-#include "compiler/brw_eu.h"
-
-void
-brw_upload_ff_gs_prog(struct brw_context *brw);
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_formatquery.c b/src/mesa/drivers/dri/i965/brw_formatquery.c
deleted file mode 100644
index 67d603f..0000000
--- a/src/mesa/drivers/dri/i965/brw_formatquery.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright Â© 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "main/context.h"
-#include "main/formatquery.h"
-#include "main/glformats.h"
-
-static size_t
-brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
-                             GLenum internalFormat, int samples[16])
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   (void) target;
-   (void) internalFormat;
-
-   switch (devinfo->ver) {
-   case 11:
-   case 10:
-   case 9:
-      samples[0] = 16;
-      samples[1] = 8;
-      samples[2] = 4;
-      samples[3] = 2;
-      return 4;
-
-   case 8:
-      samples[0] = 8;
-      samples[1] = 4;
-      samples[2] = 2;
-      return 3;
-
-   case 7:
-      if (internalFormat == GL_RGBA32F && _mesa_is_gles(ctx)) {
-         /* For GLES, we are allowed to return a smaller number of samples for
-          * GL_RGBA32F. See OpenGLES 3.2 spec, section 20.3.1 Internal Format
-          * Query Parameters, under SAMPLES:
-          *
-          * "A value less than or equal to the value of MAX_SAMPLES, if
-          *  internalformat is RGBA16F, R32F, RG32F, or RGBA32F."
-          *
-          * In brw_render_target_supported, we prevent formats with a size
-          * greater than 8 bytes from using 8x MSAA on gfx7.
-          */
-         samples[0] = 4;
-         return 1;
-      } else {
-         samples[0] = 8;
-         samples[1] = 4;
-         return 2;
-      }
-
-   case 6:
-      samples[0] = 4;
-      return 1;
-
-   default:
-      assert(devinfo->ver < 6);
-      samples[0] = 1;
-      return 1;
-   }
-}
-
-void
-brw_query_internal_format(struct gl_context *ctx, GLenum target,
-                          GLenum internalFormat, GLenum pname, GLint *params)
-{
-   /* The Mesa layer gives us a temporary params buffer that is guaranteed
-    * to be non-NULL, and have at least 16 elements.
-    */
-   assert(params != NULL);
-
-   switch (pname) {
-   case GL_SAMPLES:
-      brw_query_samples_for_format(ctx, target, internalFormat, params);
-      break;
-
-   case GL_NUM_SAMPLE_COUNTS: {
-      size_t num_samples;
-      GLint dummy_buffer[16];
-
-      num_samples = brw_query_samples_for_format(ctx, target, internalFormat,
-                                                 dummy_buffer);
-      params[0] = (GLint) num_samples;
-      break;
-   }
-
-   default:
-      /* By default, we call the driver hook's fallback function from the frontend,
-       * which has generic implementation for all pnames.
-       */
-      _mesa_query_internal_format_default(ctx, target, internalFormat, pname,
-                                          params);
-      break;
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/brw_generate_mipmap.c b/src/mesa/drivers/dri/i965/brw_generate_mipmap.c
deleted file mode 100644
index e533f84..0000000
--- a/src/mesa/drivers/dri/i965/brw_generate_mipmap.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright Â© 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "main/mipmap.h"
-#include "main/teximage.h"
-#include "brw_blorp.h"
-#include "brw_context.h"
-#include "brw_tex.h"
-#include "drivers/common/meta.h"
-
-#define FILE_DEBUG_FLAG DEBUG_BLORP
-
-
-/**
- * The GenerateMipmap() driver hook.
- */
-void
-brw_generate_mipmap(struct gl_context *ctx, GLenum target,
-                    struct gl_texture_object *tex_obj)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct brw_texture_object *intel_obj = brw_texture_object(tex_obj);
-   const unsigned base_level = tex_obj->Attrib.BaseLevel;
-   unsigned last_level, first_layer, last_layer;
-
-   /* Blorp doesn't handle combined depth/stencil surfaces on Gfx4-5 yet. */
-   if (devinfo->ver <= 5 &&
-       (tex_obj->Image[0][base_level]->_BaseFormat == GL_DEPTH_COMPONENT ||
-        tex_obj->Image[0][base_level]->_BaseFormat == GL_DEPTH_STENCIL)) {
-      _mesa_meta_GenerateMipmap(ctx, target, tex_obj);
-      return;
-   }
-
-   /* find expected last mipmap level to generate */
-   last_level = _mesa_compute_num_levels(ctx, tex_obj, target) - 1;
-
-   if (last_level == 0)
-      return;
-
-   /* The texture isn't in a "complete" state yet so set the expected
-    * last_level here; we're not going through normal texture validation.
-    */
-   intel_obj->_MaxLevel = last_level;
-
-   if (!tex_obj->Immutable) {
-      _mesa_prepare_mipmap_levels(ctx, tex_obj, base_level, last_level);
-
-      /* At this point, memory for all the texture levels has been
-       * allocated.  However, the base level image may be in one resource
-       * while the subsequent/smaller levels may be in another resource.
-       * Finalizing the texture will copy the base images from the former
-       * resource to the latter.
-       *
-       * After this, we'll have all mipmap levels in one resource.
-       */
-      brw_finalize_mipmap_tree(brw, tex_obj);
-   }
-
-   struct brw_mipmap_tree *mt = intel_obj->mt;
-   if (!mt) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "mipmap generation");
-      return;
-   }
-
-   const mesa_format format = intel_obj->_Format;
-
-   /* Fall back to the CPU for non-renderable cases.
-    *
-    * TODO: 3D textures require blending data from multiple slices,
-    * which means we need custom shaders.  For now, fall back.
-    */
-   if (!brw->mesa_format_supports_render[format] || target == GL_TEXTURE_3D) {
-      _mesa_generate_mipmap(ctx, target, tex_obj);
-      return;
-   }
-
-   const struct isl_extent4d *base_size = &mt->surf.logical_level0_px;
-
-   if (mt->target == GL_TEXTURE_CUBE_MAP) {
-      first_layer = _mesa_tex_target_to_face(target);
-      last_layer = first_layer;
-   } else {
-      first_layer = 0;
-      last_layer = base_size->array_len - 1;
-   }
-
-   /* The GL_EXT_texture_sRGB_decode extension's issues section says:
-    *
-    *    "10) How is mipmap generation of sRGB textures affected by the
-    *     TEXTURE_SRGB_DECODE_EXT parameter?
-    *
-    *     RESOLVED:  When the TEXTURE_SRGB_DECODE parameter is DECODE_EXT
-    *     for an sRGB texture, mipmap generation should decode sRGB texels
-    *     to a linear RGB color space, perform downsampling, then encode
-    *     back to an sRGB color space.  (Issue 24 in the EXT_texture_sRGB
-    *     specification provides a rationale for why.)  When the parameter
-    *     is SKIP_DECODE_EXT instead, mipmap generation skips the encode
-    *     and decode steps during mipmap generation.  By skipping the
-    *     encode and decode steps, sRGB mipmap generation should match
-    *     the mipmap generation for a non-sRGB texture."
-    */
-   bool do_srgb = tex_obj->Sampler.Attrib.sRGBDecode == GL_DECODE_EXT;
-
-   for (unsigned dst_level = base_level + 1;
-        dst_level <= last_level;
-        dst_level++) {
-
-      const unsigned src_level = dst_level - 1;
-
-      for (unsigned layer = first_layer; layer <= last_layer; layer++) {
-         brw_blorp_blit_miptrees(brw, mt, src_level, layer, format,
-                                 SWIZZLE_XYZW, mt, dst_level, layer, format,
-                                 0, 0,
-                                 minify(base_size->width, src_level),
-                                 minify(base_size->height, src_level),
-                                 0, 0,
-                                 minify(base_size->width, dst_level),
-                                 minify(base_size->height, dst_level),
-                                 GL_LINEAR, false, false,
-                                 do_srgb, do_srgb);
-      }
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
deleted file mode 100644
index b3acdec..0000000
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * Copyright Â© 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_vec4_gs.c
- *
- * State atom for client-programmable geometry shaders, and support code.
- */
-
-#include "brw_gs.h"
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_ff_gs.h"
-#include "compiler/brw_nir.h"
-#include "brw_program.h"
-#include "compiler/glsl/ir_uniform.h"
-
-static void
-assign_gs_binding_table_offsets(const struct intel_device_info *devinfo,
-                                const struct gl_program *prog,
-                                struct brw_gs_prog_data *prog_data)
-{
-   /* In gfx6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform
-    * feedback surfaces.
-    */
-   uint32_t reserved = devinfo->ver == 6 ? BRW_MAX_SOL_BINDINGS : 0;
-
-   brw_assign_common_binding_table_offsets(devinfo, prog,
-                                           &prog_data->base.base, reserved);
-}
-
-static void
-brw_gfx6_xfb_setup(const struct gl_transform_feedback_info *linked_xfb_info,
-                   struct brw_gs_prog_data *gs_prog_data)
-{
-   static const unsigned swizzle_for_offset[4] = {
-      BRW_SWIZZLE4(0, 1, 2, 3),
-      BRW_SWIZZLE4(1, 2, 3, 3),
-      BRW_SWIZZLE4(2, 3, 3, 3),
-      BRW_SWIZZLE4(3, 3, 3, 3)
-   };
-
-   int i;
-
-   /* Make sure that the VUE slots won't overflow the unsigned chars in
-    * prog_data->transform_feedback_bindings[].
-    */
-   STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);
-
-   /* Make sure that we don't need more binding table entries than we've
-    * set aside for use in transform feedback.  (We shouldn't, since we
-    * set aside enough binding table entries to have one per component).
-    */
-   assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
-
-   gs_prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
-   for (i = 0; i < gs_prog_data->num_transform_feedback_bindings; i++) {
-      gs_prog_data->transform_feedback_bindings[i] =
-         linked_xfb_info->Outputs[i].OutputRegister;
-      gs_prog_data->transform_feedback_swizzles[i] =
-         swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
-   }
-}
-static bool
-brw_codegen_gs_prog(struct brw_context *brw,
-                    struct brw_program *gp,
-                    struct brw_gs_prog_key *key)
-{
-   struct brw_compiler *compiler = brw->screen->compiler;
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct brw_stage_state *stage_state = &brw->gs.base;
-   struct brw_gs_prog_data prog_data;
-   bool start_busy = false;
-   double start_time = 0;
-
-   memset(&prog_data, 0, sizeof(prog_data));
-
-   void *mem_ctx = ralloc_context(NULL);
-
-   nir_shader *nir = nir_shader_clone(mem_ctx, gp->program.nir);
-
-   assign_gs_binding_table_offsets(devinfo, &gp->program, &prog_data);
-
-   brw_nir_setup_glsl_uniforms(mem_ctx, nir, &gp->program,
-                               &prog_data.base.base,
-                               compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
-   if (brw->can_push_ubos) {
-      brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
-                                 prog_data.base.base.ubo_ranges);
-   }
-
-   uint64_t outputs_written = nir->info.outputs_written;
-
-   brw_compute_vue_map(devinfo,
-                       &prog_data.base.vue_map, outputs_written,
-                       gp->program.info.separate_shader, 1);
-
-   if (devinfo->ver == 6)
-      brw_gfx6_xfb_setup(gp->program.sh.LinkedTransformFeedback,
-                         &prog_data);
-
-   int st_index = -1;
-   if (INTEL_DEBUG(DEBUG_SHADER_TIME))
-      st_index = brw_get_shader_time_index(brw, &gp->program, ST_GS, true);
-
-   if (unlikely(brw->perf_debug)) {
-      start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo);
-      start_time = get_time();
-   }
-
-   char *error_str;
-   const unsigned *program =
-      brw_compile_gs(brw->screen->compiler, brw, mem_ctx, key,
-                     &prog_data, nir, st_index,
-                     NULL, &error_str);
-   if (program == NULL) {
-      ralloc_strcat(&gp->program.sh.data->InfoLog, error_str);
-      _mesa_problem(NULL, "Failed to compile geometry shader: %s\n", error_str);
-
-      ralloc_free(mem_ctx);
-      return false;
-   }
-
-   if (unlikely(brw->perf_debug)) {
-      if (gp->compiled_once) {
-         brw_debug_recompile(brw, MESA_SHADER_GEOMETRY, gp->program.Id,
-                             &key->base);
-      }
-      if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
-         perf_debug("GS compile took %.03f ms and stalled the GPU\n",
-                    (get_time() - start_time) * 1000);
-      }
-      gp->compiled_once = true;
-   }
-
-   /* Scratch space is used for register spilling */
-   brw_alloc_stage_scratch(brw, stage_state,
-                           prog_data.base.base.total_scratch);
-
-   /* The param and pull_param arrays will be freed by the shader cache. */
-   ralloc_steal(NULL, prog_data.base.base.param);
-   ralloc_steal(NULL, prog_data.base.base.pull_param);
-   brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
-                    key, sizeof(*key),
-                    program, prog_data.base.base.program_size,
-                    &prog_data, sizeof(prog_data),
-                    &stage_state->prog_offset, &brw->gs.base.prog_data);
-   ralloc_free(mem_ctx);
-
-   return true;
-}
-
-static bool
-brw_gs_state_dirty(const struct brw_context *brw)
-{
-   return brw_state_dirty(brw,
-                          _NEW_TEXTURE,
-                          BRW_NEW_GEOMETRY_PROGRAM |
-                          BRW_NEW_TRANSFORM_FEEDBACK);
-}
-
-void
-brw_gs_populate_key(struct brw_context *brw,
-                    struct brw_gs_prog_key *key)
-{
-   struct gl_context *ctx = &brw->ctx;
-   struct brw_program *gp =
-      (struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY];
-
-   memset(key, 0, sizeof(*key));
-
-   brw_populate_base_prog_key(ctx, gp, &key->base);
-}
-
-void
-brw_upload_gs_prog(struct brw_context *brw)
-{
-   struct brw_stage_state *stage_state = &brw->gs.base;
-   struct brw_gs_prog_key key;
-   /* BRW_NEW_GEOMETRY_PROGRAM */
-   struct brw_program *gp =
-      (struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY];
-
-   if (!brw_gs_state_dirty(brw))
-      return;
-
-   brw_gs_populate_key(brw, &key);
-
-   if (brw_search_cache(&brw->cache, BRW_CACHE_GS_PROG, &key, sizeof(key),
-                        &stage_state->prog_offset, &brw->gs.base.prog_data,
-                        true))
-      return;
-
-   if (brw_disk_cache_upload_program(brw, MESA_SHADER_GEOMETRY))
-      return;
-
-   gp = (struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY];
-   gp->id = key.base.program_string_id;
-
-   ASSERTED bool success = brw_codegen_gs_prog(brw, gp, &key);
-   assert(success);
-}
-
-void
-brw_gs_populate_default_key(const struct brw_compiler *compiler,
-                            struct brw_gs_prog_key *key,
-                            struct gl_program *prog)
-{
-   const struct intel_device_info *devinfo = compiler->devinfo;
-
-   memset(key, 0, sizeof(*key));
-
-   brw_populate_default_base_prog_key(devinfo, brw_program(prog),
-                                      &key->base);
-}
-
-bool
-brw_gs_precompile(struct gl_context *ctx, struct gl_program *prog)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_gs_prog_key key;
-   uint32_t old_prog_offset = brw->gs.base.prog_offset;
-   struct brw_stage_prog_data *old_prog_data = brw->gs.base.prog_data;
-   bool success;
-
-   struct brw_program *bgp = brw_program(prog);
-
-   brw_gs_populate_default_key(brw->screen->compiler, &key, prog);
-
-   success = brw_codegen_gs_prog(brw, bgp, &key);
-
-   brw->gs.base.prog_offset = old_prog_offset;
-   brw->gs.base.prog_data = old_prog_data;
-
-   return success;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h
deleted file mode 100644
index 7dab548..0000000
--- a/src/mesa/drivers/dri/i965/brw_gs.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright Â© 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef BRW_VEC4_GS_H
-#define BRW_VEC4_GS_H
-
-#include <stdbool.h>
-
-#include "brw_context.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct gl_shader_program;
-
-void
-brw_upload_gs_prog(struct brw_context *brw);
-
-void
-brw_gs_populate_key(struct brw_context *brw,
-                    struct brw_gs_prog_key *key);
-void
-brw_gs_populate_default_key(const struct brw_compiler *compiler,
-                            struct brw_gs_prog_key *key,
-                            struct gl_program *prog);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* BRW_VEC4_GS_H */
diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
deleted file mode 100644
index 6f2629e..0000000
--- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright Â© 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "main/mtypes.h"
-#include "program/prog_parameter.h"
-#include "main/shaderapi.h"
-
-#include "brw_context.h"
-#include "brw_state.h"
-
-
-/* Creates a new GS constant buffer reflecting the current GS program's
- * constants, if needed by the GS program.
- *
- * Otherwise, constants go through the CURBEs using the brw_constant_buffer
- * state atom.
- */
-static void
-brw_upload_gs_pull_constants(struct brw_context *brw)
-{
-   struct brw_stage_state *stage_state = &brw->gs.base;
-
-   /* BRW_NEW_GEOMETRY_PROGRAM */
-   struct brw_program *gp =
-      (struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY];
-
-   if (!gp)
-      return;
-
-   /* BRW_NEW_GS_PROG_DATA */
-   const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
-
-   _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_GEOMETRY);
-   /* _NEW_PROGRAM_CONSTANTS */
-   brw_upload_pull_constants(brw, BRW_NEW_GS_CONSTBUF, &gp->program,
-                             stage_state, prog_data);
-}
-
-const struct brw_tracked_state brw_gs_pull_constants = {
-   .dirty = {
-      .mesa = _NEW_PROGRAM_CONSTANTS,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_GEOMETRY_PROGRAM |
-             BRW_NEW_GS_PROG_DATA,
-   },
-   .emit = brw_upload_gs_pull_constants,
-};
-
-static void
-brw_upload_gs_ubo_surfaces(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   /* _NEW_PROGRAM */
-   struct gl_program *prog =
-      ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
-
-   /* BRW_NEW_GS_PROG_DATA */
-   struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
-
-   brw_upload_ubo_surfaces(brw, prog, &brw->gs.base, prog_data);
-}
-
-const struct brw_tracked_state brw_gs_ubo_surfaces = {
-   .dirty = {
-      .mesa = _NEW_PROGRAM,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_GS_PROG_DATA |
-             BRW_NEW_UNIFORM_BUFFER,
-   },
-   .emit = brw_upload_gs_ubo_surfaces,
-};
-
-static void
-brw_upload_gs_image_surfaces(struct brw_context *brw)
-{
-   /* BRW_NEW_GEOMETRY_PROGRAM */
-   const struct gl_program *gp = brw->programs[MESA_SHADER_GEOMETRY];
-
-   if (gp) {
-      /* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
-      brw_upload_image_surfaces(brw, gp, &brw->gs.base,
-                                brw->gs.base.prog_data);
-   }
-}
-
-const struct brw_tracked_state brw_gs_image_surfaces = {
-   .dirty = {
-      .mesa = _NEW_TEXTURE,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_AUX_STATE |
-             BRW_NEW_GEOMETRY_PROGRAM |
-             BRW_NEW_GS_PROG_DATA |
-             BRW_NEW_IMAGE_UNITS,
-   },
-   .emit = brw_upload_gs_image_surfaces,
-};
diff --git a/src/mesa/drivers/dri/i965/brw_image.h b/src/mesa/drivers/dri/i965/brw_image.h
deleted file mode 100644
index 6a976eb..0000000
--- a/src/mesa/drivers/dri/i965/brw_image.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef BRW_IMAGE_H
-#define BRW_IMAGE_H
-
-/** @file intel_image.h
- *
- * Structure definitions and prototypes for __DRIimage, the driver-private
- * structure backing EGLImage or a drawable in DRI3.
- *
- * The __DRIimage is passed around the loader code (src/glx and src/egl), but
- * it's opaque to that code and may only be accessed by loader extensions
- * (mostly located in brw_screen.c).
- */
-
-#include <stdbool.h>
-#include <xf86drm.h>
-
-#include "main/mtypes.h"
-#include "brw_bufmgr.h"
-#include <GL/internal/dri_interface.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * Used with images created with image_from_names
- * to help support planar images.
- */
-struct brw_image_format {
-   int fourcc;
-   int components;
-   int nplanes;
-   struct {
-      int buffer_index;
-      int width_shift;
-      int height_shift;
-      uint32_t dri_format;
-      int cpp;
-   } planes[3];
-   float scaling_factor;
-};
-
-struct __DRIimageRec {
-   struct brw_screen *screen;
-   struct brw_bo *bo;
-   uint32_t pitch; /**< in bytes */
-   GLenum internal_format;
-   uint32_t dri_format;
-   GLuint format; /**< mesa_format or mesa_array_format */
-   uint64_t modifier; /**< fb modifier (fourcc) */
-   uint32_t offset;
-
-   /*
-    * Need to save these here between calls to
-    * image_from_names and calls to image_from_planar.
-    */
-   uint32_t strides[3];
-   uint32_t offsets[3];
-   const struct brw_image_format *planar_format;
-
-   /* particular miptree level */
-   GLuint width;
-   GLuint height;
-   GLuint tile_x;
-   GLuint tile_y;
-   bool has_depthstencil;
-   bool imported_dmabuf;
-
-   /** Offset of the auxiliary compression surface in the bo. */
-   uint32_t aux_offset;
-
-   /** Pitch of the auxiliary compression surface. */
-   uint32_t aux_pitch;
-
-   /** Total size in bytes of the auxiliary compression surface. */
-   uint32_t aux_size;
-
-   /**
-    * Provided by EGL_EXT_image_dma_buf_import.
-    * \{
-    */
-   enum __DRIYUVColorSpace yuv_color_space;
-   enum __DRISampleRange sample_range;
-   enum __DRIChromaSiting horizontal_siting;
-   enum __DRIChromaSiting vertical_siting;
-   /* \} */
-
-   __DRIscreen *driScrnPriv;
-
-   void *loader_private;
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp
deleted file mode 100644
index 974543e..0000000
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ /dev/null
@@ -1,401 +0,0 @@
-/*
- * Copyright Â© 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_context.h"
-#include "compiler/brw_nir.h"
-#include "brw_program.h"
-#include "compiler/glsl/gl_nir.h"
-#include "compiler/glsl/gl_nir_linker.h"
-#include "compiler/glsl/ir.h"
-#include "compiler/glsl/ir_optimization.h"
-#include "compiler/glsl/program.h"
-#include "compiler/nir/nir_serialize.h"
-#include "program/program.h"
-#include "main/glspirv.h"
-#include "main/mtypes.h"
-#include "main/shaderapi.h"
-#include "main/shaderobj.h"
-#include "main/uniforms.h"
-
-/**
- * Performs a compile of the shader stages even when we don't know
- * what non-orthogonal state will be set, in the hope that it reflects
- * the eventual NOS used, and thus allows us to produce link failures.
- */
-static bool
-brw_shader_precompile(struct gl_context *ctx,
-                      struct gl_shader_program *sh_prog)
-{
-   struct gl_linked_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
-   struct gl_linked_shader *tcs = sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
-   struct gl_linked_shader *tes = sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
-   struct gl_linked_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
-   struct gl_linked_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
-   struct gl_linked_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE];
-
-   if (fs && !brw_fs_precompile(ctx, fs->Program))
-      return false;
-
-   if (gs && !brw_gs_precompile(ctx, gs->Program))
-      return false;
-
-   if (tes && !brw_tes_precompile(ctx, sh_prog, tes->Program))
-      return false;
-
-   if (tcs && !brw_tcs_precompile(ctx, sh_prog, tcs->Program))
-      return false;
-
-   if (vs && !brw_vs_precompile(ctx, vs->Program))
-      return false;
-
-   if (cs && !brw_cs_precompile(ctx, cs->Program))
-      return false;
-
-   return true;
-}
-
-static void
-brw_lower_packing_builtins(struct brw_context *brw,
-                           exec_list *ir)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   /* Gens < 7 don't have instructions to convert to or from half-precision,
-    * and Gens < 6 don't expose that functionality.
-    */
-   if (devinfo->ver != 6)
-      return;
-
-   lower_packing_builtins(ir, LOWER_PACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16);
-}
-
-static void
-process_glsl_ir(struct brw_context *brw,
-                struct gl_shader_program *shader_prog,
-                struct gl_linked_shader *shader)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-
-   /* Temporary memory context for any new IR. */
-   void *mem_ctx = ralloc_context(NULL);
-
-   ralloc_adopt(mem_ctx, shader->ir);
-
-   if (shader->Stage == MESA_SHADER_FRAGMENT) {
-      lower_blend_equation_advanced(
-         shader, ctx->Extensions.KHR_blend_equation_advanced_coherent);
-   }
-
-   /* lower_packing_builtins() inserts arithmetic instructions, so it
-    * must precede lower_instructions().
-    */
-   brw_lower_packing_builtins(brw, shader->ir);
-   do_mat_op_to_vec(shader->ir);
-
-   unsigned instructions_to_lower = (DIV_TO_MUL_RCP |
-                                     SUB_TO_ADD_NEG |
-                                     EXP_TO_EXP2 |
-                                     LOG_TO_LOG2 |
-                                     DFREXP_DLDEXP_TO_ARITH);
-   if (devinfo->ver < 7) {
-      instructions_to_lower |= BIT_COUNT_TO_MATH |
-                               EXTRACT_TO_SHIFTS |
-                               INSERT_TO_SHIFTS |
-                               REVERSE_TO_SHIFTS;
-   }
-
-   lower_instructions(shader->ir, instructions_to_lower);
-
-   /* Pre-gfx6 HW can only nest if-statements 16 deep.  Beyond this,
-    * if-statements need to be flattened.
-    */
-   if (devinfo->ver < 6)
-      lower_if_to_cond_assign(shader->Stage, shader->ir, 16);
-
-   do_vec_index_to_cond_assign(shader->ir);
-   lower_vector_insert(shader->ir, true);
-   lower_offset_arrays(shader->ir);
-   lower_quadop_vector(shader->ir, false);
-
-   validate_ir_tree(shader->ir);
-
-   /* Now that we've finished altering the linked IR, reparent any live IR back
-    * to the permanent memory context, and free the temporary one (discarding any
-    * junk we optimized away).
-    */
-   reparent_ir(shader->ir, shader->ir);
-   ralloc_free(mem_ctx);
-
-   if (ctx->_Shader->Flags & GLSL_DUMP) {
-      fprintf(stderr, "\n");
-      if (shader->ir) {
-         fprintf(stderr, "GLSL IR for linked %s program %d:\n",
-                 _mesa_shader_stage_to_string(shader->Stage),
-                 shader_prog->Name);
-         _mesa_print_ir(stderr, shader->ir, NULL);
-      } else {
-         fprintf(stderr, "No GLSL IR for linked %s program %d (shader may be "
-                 "from cache)\n", _mesa_shader_stage_to_string(shader->Stage),
-                 shader_prog->Name);
-      }
-      fprintf(stderr, "\n");
-   }
-}
-
-static void
-unify_interfaces(struct shader_info **infos)
-{
-   struct shader_info *prev_info = NULL;
-
-   for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) {
-      if (!infos[i])
-         continue;
-
-      if (prev_info) {
-         prev_info->outputs_written |= infos[i]->inputs_read &
-            ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
-         infos[i]->inputs_read |= prev_info->outputs_written &
-            ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
-
-         prev_info->patch_outputs_written |= infos[i]->patch_inputs_read;
-         infos[i]->patch_inputs_read |= prev_info->patch_outputs_written;
-      }
-      prev_info = infos[i];
-   }
-}
-
-static void
-update_xfb_info(struct gl_transform_feedback_info *xfb_info,
-                struct shader_info *info)
-{
-   if (!xfb_info)
-      return;
-
-   for (unsigned i = 0; i < xfb_info->NumOutputs; i++) {
-      struct gl_transform_feedback_output *output = &xfb_info->Outputs[i];
-
-      /* The VUE header contains three scalar fields packed together:
-       * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
-       * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
-       * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
-       */
-      switch (output->OutputRegister) {
-      case VARYING_SLOT_LAYER:
-         assert(output->NumComponents == 1);
-         output->OutputRegister = VARYING_SLOT_PSIZ;
-         output->ComponentOffset = 1;
-         break;
-      case VARYING_SLOT_VIEWPORT:
-         assert(output->NumComponents == 1);
-         output->OutputRegister = VARYING_SLOT_PSIZ;
-         output->ComponentOffset = 2;
-         break;
-      case VARYING_SLOT_PSIZ:
-         assert(output->NumComponents == 1);
-         output->ComponentOffset = 3;
-         break;
-      }
-
-      info->outputs_written |= 1ull << output->OutputRegister;
-   }
-}
-
-extern "C" GLboolean
-brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct brw_compiler *compiler = brw->screen->compiler;
-   unsigned int stage;
-   struct shader_info *infos[MESA_SHADER_STAGES] = { 0, };
-
-   if (shProg->data->LinkStatus == LINKING_SKIPPED)
-      return GL_TRUE;
-
-   for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
-      struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
-      if (!shader)
-         continue;
-
-      struct gl_program *prog = shader->Program;
-      prog->Parameters = _mesa_new_parameter_list();
-
-      if (!shader->spirv_data)
-         process_glsl_ir(brw, shProg, shader);
-
-      _mesa_copy_linked_program_data(shProg, shader);
-
-      prog->ShadowSamplers = shader->shadow_samplers;
-
-      bool debug_enabled =
-         INTEL_DEBUG(intel_debug_flag_for_shader_stage(shader->Stage));
-
-      if (debug_enabled && shader->ir) {
-         fprintf(stderr, "GLSL IR for native %s shader %d:\n",
-                 _mesa_shader_stage_to_string(shader->Stage), shProg->Name);
-         _mesa_print_ir(stderr, shader->ir, NULL);
-         fprintf(stderr, "\n\n");
-      }
-
-      prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
-                                 compiler->scalar_stage[stage]);
-   }
-
-   /* TODO: Verify if its feasible to split up the NIR linking work into a
-    * per-stage part (that fill out information we need for the passes) and a
-    * actual linking part, so that we could fold back brw_nir_lower_resources
-    * back into brw_create_nir.
-    */
-
-   /* SPIR-V programs use a NIR linker */
-   if (shProg->data->spirv) {
-      static const gl_nir_linker_options opts = {
-         .fill_parameters = false,
-      };
-      if (!gl_nir_link_spirv(ctx, shProg, &opts))
-         return GL_FALSE;
-   }
-
-   for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
-      struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
-      if (!shader)
-         continue;
-
-      struct gl_program *prog = shader->Program;
-
-      brw_nir_lower_resources(prog->nir, shProg, prog, &brw->screen->devinfo);
-
-      NIR_PASS_V(prog->nir, brw_nir_lower_gl_images, prog);
-   }
-
-   /* Determine first and last stage. */
-   unsigned first = MESA_SHADER_STAGES;
-   unsigned last = 0;
-   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-      if (!shProg->_LinkedShaders[i])
-         continue;
-      if (first == MESA_SHADER_STAGES)
-         first = i;
-      last = i;
-   }
-
-   /* Linking the stages in the opposite order (from fragment to vertex)
-    * ensures that inter-shader outputs written to in an earlier stage
-    * are eliminated if they are (transitively) not used in a later
-    * stage.
-    *
-    * TODO: Look into Shadow of Mordor regressions on HSW and enable this for
-    * all platforms. See: https://bugs.freedesktop.org/show_bug.cgi?id=103537
-    */
-    if (first != last && brw->screen->devinfo.ver >= 8) {
-       int next = last;
-       for (int i = next - 1; i >= 0; i--) {
-          if (shProg->_LinkedShaders[i] == NULL)
-             continue;
-
-          brw_nir_link_shaders(compiler,
-                               shProg->_LinkedShaders[i]->Program->nir,
-                               shProg->_LinkedShaders[next]->Program->nir);
-          next = i;
-       }
-    }
-
-   for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
-      struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
-      if (!shader)
-         continue;
-
-      struct gl_program *prog = shader->Program;
-
-      _mesa_update_shader_textures_used(shProg, prog);
-
-      brw_shader_gather_info(prog->nir, prog);
-
-      NIR_PASS_V(prog->nir, gl_nir_lower_atomics, shProg, false);
-      NIR_PASS_V(prog->nir, nir_lower_atomics_to_ssbo);
-
-      nir_sweep(prog->nir);
-
-      infos[stage] = &prog->nir->info;
-
-      update_xfb_info(prog->sh.LinkedTransformFeedback, infos[stage]);
-
-      /* Make a pass over the IR to add state references for any built-in
-       * uniforms that are used.  This has to be done now (during linking).
-       * Code generation doesn't happen until the first time this shader is
-       * used for rendering.  Waiting until then to generate the parameters is
-       * too late.  At that point, the values for the built-in uniforms won't
-       * get sent to the shader.
-       */
-      nir_foreach_uniform_variable(var, prog->nir) {
-         const nir_state_slot *const slots = var->state_slots;
-         for (unsigned int i = 0; i < var->num_state_slots; i++) {
-            assert(slots != NULL);
-            _mesa_add_state_reference(prog->Parameters, slots[i].tokens);
-         }
-      }
-   }
-
-   /* The linker tries to dead code eliminate unused varying components,
-    * and make sure interfaces match.  But it isn't able to do so in all
-    * cases.  So, explicitly make the interfaces match by OR'ing together
-    * the inputs_read/outputs_written bitfields of adjacent stages.
-    */
-   if (!shProg->SeparateShader)
-      unify_interfaces(infos);
-
-   if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) {
-      for (unsigned i = 0; i < shProg->NumShaders; i++) {
-         const struct gl_shader *sh = shProg->Shaders[i];
-         if (!sh)
-            continue;
-
-         fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n",
-                 _mesa_shader_stage_to_string(sh->Stage),
-                 i, shProg->Name);
-         fprintf(stderr, "%s", sh->Source);
-         fprintf(stderr, "\n");
-      }
-   }
-
-   if (brw->precompile && !brw_shader_precompile(ctx, shProg))
-      return GL_FALSE;
-
-   /* SPIR-V programs build its resource list from linked NIR shaders. */
-   if (!shProg->data->spirv)
-      build_program_resource_list(ctx, shProg, false);
-   else
-      nir_build_program_resource_list(ctx, shProg, true);
-
-   for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
-      struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
-      if (!shader)
-         continue;
-
-      /* The GLSL IR won't be needed anymore. */
-      ralloc_free(shader->ir);
-      shader->ir = NULL;
-   }
-
-   return GL_TRUE;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_meta_util.c b/src/mesa/drivers/dri/i965/brw_meta_util.c
deleted file mode 100644
index eca367d..0000000
--- a/src/mesa/drivers/dri/i965/brw_meta_util.c
+++ /dev/null
@@ -1,422 +0,0 @@
-/*
- * Copyright Â© 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_fbo.h"
-#include "brw_meta_util.h"
-#include "brw_state.h"
-#include "main/blend.h"
-#include "main/fbobject.h"
-#include "util/format_srgb.h"
-
-/**
- * Helper function for handling mirror image blits.
- *
- * If coord0 > coord1, swap them and invert the "mirror" boolean.
- */
-static inline void
-fixup_mirroring(bool *mirror, float *coord0, float *coord1)
-{
-   if (*coord0 > *coord1) {
-      *mirror = !*mirror;
-      float tmp = *coord0;
-      *coord0 = *coord1;
-      *coord1 = tmp;
-   }
-}
-
-/**
- * Compute the number of pixels to clip for each side of a rect
- *
- * \param x0 The rect's left coordinate
- * \param y0 The rect's bottom coordinate
- * \param x1 The rect's right coordinate
- * \param y1 The rect's top coordinate
- * \param min_x The clipping region's left coordinate
- * \param min_y The clipping region's bottom coordinate
- * \param max_x The clipping region's right coordinate
- * \param max_y The clipping region's top coordinate
- * \param clipped_x0 The number of pixels to clip from the left side
- * \param clipped_y0 The number of pixels to clip from the bottom side
- * \param clipped_x1 The number of pixels to clip from the right side
- * \param clipped_y1 The number of pixels to clip from the top side
- *
- * \return false if we clip everything away, true otherwise
- */
-static inline bool
-compute_pixels_clipped(float x0, float y0, float x1, float y1,
-                       float min_x, float min_y, float max_x, float max_y,
-                       float *clipped_x0, float *clipped_y0, float *clipped_x1, float *clipped_y1)
-{
-   /* If we are going to clip everything away, stop. */
-   if (!(min_x <= max_x &&
-         min_y <= max_y &&
-         x0 <= max_x &&
-         y0 <= max_y &&
-         min_x <= x1 &&
-         min_y <= y1 &&
-         x0 <= x1 &&
-         y0 <= y1)) {
-      return false;
-   }
-
-   if (x0 < min_x)
-      *clipped_x0 = min_x - x0;
-   else
-      *clipped_x0 = 0;
-   if (max_x < x1)
-      *clipped_x1 = x1 - max_x;
-   else
-      *clipped_x1 = 0;
-
-   if (y0 < min_y)
-      *clipped_y0 = min_y - y0;
-   else
-      *clipped_y0 = 0;
-   if (max_y < y1)
-      *clipped_y1 = y1 - max_y;
-   else
-      *clipped_y1 = 0;
-
-   return true;
-}
-
-/**
- * Clips a coordinate (left, right, top or bottom) for the src or dst rect
- * (whichever requires the largest clip) and adjusts the coordinate
- * for the other rect accordingly.
- *
- * \param mirror true if mirroring is required
- * \param src the source rect coordinate (for example srcX0)
- * \param dst0 the dst rect coordinate (for example dstX0)
- * \param dst1 the opposite dst rect coordinate (for example dstX1)
- * \param clipped_src0 number of pixels to clip from the src coordinate
- * \param clipped_dst0 number of pixels to clip from the dst coordinate
- * \param clipped_dst1 number of pixels to clip from the opposite dst coordinate
- * \param scale the src vs dst scale involved for that coordinate
- * \param isLeftOrBottom true if we are clipping the left or bottom sides
- *        of the rect.
- */
-static inline void
-clip_coordinates(bool mirror,
-                 float *src, float *dst0, float *dst1,
-                 float clipped_src0,
-                 float clipped_dst0,
-                 float clipped_dst1,
-                 float scale,
-                 bool isLeftOrBottom)
-{
-   /* When clipping we need to add or subtract pixels from the original
-    * coordinates depending on whether we are acting on the left/bottom
-    * or right/top sides of the rect respectively. We assume we have to
-    * add them in the code below, and multiply by -1 when we should
-    * subtract.
-    */
-   int mult = isLeftOrBottom ? 1 : -1;
-
-   if (!mirror) {
-      if (clipped_src0 >= clipped_dst0 * scale) {
-         *src += clipped_src0 * mult;
-         *dst0 += clipped_src0 / scale * mult;
-      } else {
-         *dst0 += clipped_dst0 * mult;
-         *src += clipped_dst0 * scale * mult;
-      }
-   } else {
-      if (clipped_src0 >= clipped_dst1 * scale) {
-         *src += clipped_src0 * mult;
-         *dst1 -= clipped_src0 / scale * mult;
-      } else {
-         *dst1 -= clipped_dst1 * mult;
-         *src += clipped_dst1 * scale * mult;
-      }
-   }
-}
-
-bool
-brw_meta_mirror_clip_and_scissor(const struct gl_context *ctx,
-                                 const struct gl_framebuffer *read_fb,
-                                 const struct gl_framebuffer *draw_fb,
-                                 GLfloat *srcX0, GLfloat *srcY0,
-                                 GLfloat *srcX1, GLfloat *srcY1,
-                                 GLfloat *dstX0, GLfloat *dstY0,
-                                 GLfloat *dstX1, GLfloat *dstY1,
-                                 bool *mirror_x, bool *mirror_y)
-{
-   *mirror_x = false;
-   *mirror_y = false;
-
-   /* Detect if the blit needs to be mirrored */
-   fixup_mirroring(mirror_x, srcX0, srcX1);
-   fixup_mirroring(mirror_x, dstX0, dstX1);
-   fixup_mirroring(mirror_y, srcY0, srcY1);
-   fixup_mirroring(mirror_y, dstY0, dstY1);
-
-   /* Compute number of pixels to clip for each side of both rects. Return
-    * early if we are going to clip everything away.
-    */
-   float clip_src_x0;
-   float clip_src_x1;
-   float clip_src_y0;
-   float clip_src_y1;
-   float clip_dst_x0;
-   float clip_dst_x1;
-   float clip_dst_y0;
-   float clip_dst_y1;
-
-   if (!compute_pixels_clipped(*srcX0, *srcY0, *srcX1, *srcY1,
-                               0, 0, read_fb->Width, read_fb->Height,
-                               &clip_src_x0, &clip_src_y0, &clip_src_x1, &clip_src_y1))
-      return true;
-
-   if (!compute_pixels_clipped(*dstX0, *dstY0, *dstX1, *dstY1,
-                               draw_fb->_Xmin, draw_fb->_Ymin, draw_fb->_Xmax, draw_fb->_Ymax,
-                               &clip_dst_x0, &clip_dst_y0, &clip_dst_x1, &clip_dst_y1))
-      return true;
-
-   /* When clipping any of the two rects we need to adjust the coordinates in
-    * the other rect considering the scaling factor involved. To obtain the best
-    * precision we want to make sure that we only clip once per side to avoid
-    * accumulating errors due to the scaling adjustment.
-    *
-    * For example, if srcX0 and dstX0 need both to be clipped we want to avoid
-    * the situation where we clip srcX0 first, then adjust dstX0 accordingly
-    * but then we realize that the resulting dstX0 still needs to be clipped,
-    * so we clip dstX0 and adjust srcX0 again. Because we are applying scaling
-    * factors to adjust the coordinates in each clipping pass we lose some
-    * precision and that can affect the results of the blorp blit operation
-    * slightly. What we want to do here is detect the rect that we should
-    * clip first for each side so that when we adjust the other rect we ensure
-    * the resulting coordinate does not need to be clipped again.
-    *
-    * The code below implements this by comparing the number of pixels that
-    * we need to clip for each side of both rects  considering the scales
-    * involved. For example, clip_src_x0 represents the number of pixels to be
-    * clipped for the src rect's left side, so if clip_src_x0 = 5,
-    * clip_dst_x0 = 4 and scaleX = 2 it means that we are clipping more from
-    * the dst rect so we should clip dstX0 only and adjust srcX0. This is
-    * because clipping 4 pixels in the dst is equivalent to clipping
-    * 4 * 2 = 8 > 5 in the src.
-    */
-
-   if (*srcX0 == *srcX1 || *srcY0 == *srcY1
-       || *dstX0 == *dstX1 || *dstY0 == *dstY1)
-      return true;
-
-   float scaleX = (float) (*srcX1 - *srcX0) / (*dstX1 - *dstX0);
-   float scaleY = (float) (*srcY1 - *srcY0) / (*dstY1 - *dstY0);
-
-   /* Clip left side */
-   clip_coordinates(*mirror_x,
-                    srcX0, dstX0, dstX1,
-                    clip_src_x0, clip_dst_x0, clip_dst_x1,
-                    scaleX, true);
-
-   /* Clip right side */
-   clip_coordinates(*mirror_x,
-                    srcX1, dstX1, dstX0,
-                    clip_src_x1, clip_dst_x1, clip_dst_x0,
-                    scaleX, false);
-
-   /* Clip bottom side */
-   clip_coordinates(*mirror_y,
-                    srcY0, dstY0, dstY1,
-                    clip_src_y0, clip_dst_y0, clip_dst_y1,
-                    scaleY, true);
-
-   /* Clip top side */
-   clip_coordinates(*mirror_y,
-                    srcY1, dstY1, dstY0,
-                    clip_src_y1, clip_dst_y1, clip_dst_y0,
-                    scaleY, false);
-
-   /* Account for the fact that in the system framebuffer, the origin is at
-    * the lower left.
-    */
-   if (read_fb->FlipY) {
-      GLint tmp = read_fb->Height - *srcY0;
-      *srcY0 = read_fb->Height - *srcY1;
-      *srcY1 = tmp;
-      *mirror_y = !*mirror_y;
-   }
-   if (draw_fb->FlipY) {
-      GLint tmp = draw_fb->Height - *dstY0;
-      *dstY0 = draw_fb->Height - *dstY1;
-      *dstY1 = tmp;
-      *mirror_y = !*mirror_y;
-   }
-
-   /* Check for invalid bounds
-    * Can't blit for 0-dimensions
-    */
-   return *srcX0 == *srcX1 || *srcY0 == *srcY1
-      || *dstX0 == *dstX1 || *dstY0 == *dstY1;
-}
-
-/**
- * Determine if fast color clear supports the given clear color.
- *
- * Fast color clear can only clear to color values of 1.0 or 0.0.  At the
- * moment we only support floating point, unorm, and snorm buffers.
- */
-bool
-brw_is_color_fast_clear_compatible(struct brw_context *brw,
-                                   const struct brw_mipmap_tree *mt,
-                                   const union gl_color_union *color)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const struct gl_context *ctx = &brw->ctx;
-
-   /* If we're mapping the render format to a different format than the
-    * format we use for texturing then it is a bit questionable whether it
-    * should be possible to use a fast clear. Although we only actually
-    * render using a renderable format, without the override workaround it
-    * wouldn't be possible to have a non-renderable surface in a fast clear
-    * state so the hardware probably legitimately doesn't need to support
-    * this case. At least on Gfx9 this really does seem to cause problems.
-    */
-   if (devinfo->ver >= 9 &&
-       brw_isl_format_for_mesa_format(mt->format) !=
-       brw->mesa_to_isl_render_format[mt->format])
-      return false;
-
-   const mesa_format format = _mesa_get_render_format(ctx, mt->format);
-   if (_mesa_is_format_integer_color(format)) {
-      if (devinfo->ver >= 8) {
-         perf_debug("Integer fast clear not enabled for (%s)",
-                    _mesa_get_format_name(format));
-      }
-      return false;
-   }
-
-   for (int i = 0; i < 4; i++) {
-      if (!_mesa_format_has_color_component(format, i)) {
-         continue;
-      }
-
-      if (devinfo->ver < 9 &&
-          color->f[i] != 0.0f && color->f[i] != 1.0f) {
-         return false;
-      }
-   }
-   return true;
-}
-
-/**
- * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
- * SURFACE_STATE (DWORD 12-15 on SKL+).
- */
-union isl_color_value
-brw_meta_convert_fast_clear_color(const struct brw_context *brw,
-                                  const struct brw_mipmap_tree *mt,
-                                  const union gl_color_union *color)
-{
-   union isl_color_value override_color = {
-      .u32 = {
-         color->ui[0],
-         color->ui[1],
-         color->ui[2],
-         color->ui[3],
-      },
-   };
-
-   /* The sampler doesn't look at the format of the surface when the fast
-    * clear color is used so we need to implement luminance, intensity and
-    * missing components manually.
-    */
-   switch (_mesa_get_format_base_format(mt->format)) {
-   case GL_INTENSITY:
-      override_color.u32[3] = override_color.u32[0];
-      FALLTHROUGH;
-   case GL_LUMINANCE:
-   case GL_LUMINANCE_ALPHA:
-      override_color.u32[1] = override_color.u32[0];
-      override_color.u32[2] = override_color.u32[0];
-      break;
-   default:
-      for (int i = 0; i < 3; i++) {
-         if (!_mesa_format_has_color_component(mt->format, i))
-            override_color.u32[i] = 0;
-      }
-      break;
-   }
-
-   switch (_mesa_get_format_datatype(mt->format)) {
-   case GL_UNSIGNED_NORMALIZED:
-      for (int i = 0; i < 4; i++)
-         override_color.f32[i] = SATURATE(override_color.f32[i]);
-      break;
-
-   case GL_SIGNED_NORMALIZED:
-      for (int i = 0; i < 4; i++)
-         override_color.f32[i] = CLAMP(override_color.f32[i], -1.0f, 1.0f);
-      break;
-
-   case GL_UNSIGNED_INT:
-      for (int i = 0; i < 4; i++) {
-         unsigned bits = _mesa_get_format_bits(mt->format, GL_RED_BITS + i);
-         if (bits < 32) {
-            uint32_t max = (1u << bits) - 1;
-            override_color.u32[i] = MIN2(override_color.u32[i], max);
-         }
-      }
-      break;
-
-   case GL_INT:
-      for (int i = 0; i < 4; i++) {
-         unsigned bits = _mesa_get_format_bits(mt->format, GL_RED_BITS + i);
-         if (bits < 32) {
-            int32_t max = (1 << (bits - 1)) - 1;
-            int32_t min = -(1 << (bits - 1));
-            override_color.i32[i] = CLAMP(override_color.i32[i], min, max);
-         }
-      }
-      break;
-
-   case GL_FLOAT:
-      if (!_mesa_is_format_signed(mt->format)) {
-         for (int i = 0; i < 4; i++)
-            override_color.f32[i] = MAX2(override_color.f32[i], 0.0f);
-      }
-      break;
-   }
-
-   if (!_mesa_format_has_color_component(mt->format, 3)) {
-      if (_mesa_is_format_integer_color(mt->format))
-         override_color.u32[3] = 1;
-      else
-         override_color.f32[3] = 1.0f;
-   }
-
-   /* Handle linear to SRGB conversion */
-   if (brw->ctx.Color.sRGBEnabled &&
-       _mesa_get_srgb_format_linear(mt->format) != mt->format) {
-      for (int i = 0; i < 3; i++) {
-         override_color.f32[i] =
-            util_format_linear_to_srgb_float(override_color.f32[i]);
-      }
-   }
-
-   return override_color;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_meta_util.h b/src/mesa/drivers/dri/i965/brw_meta_util.h
deleted file mode 100644
index c469490..0000000
--- a/src/mesa/drivers/dri/i965/brw_meta_util.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright Â© 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_META_UTIL_H
-#define BRW_META_UTIL_H
-
-#include <stdbool.h>
-#include "main/mtypes.h"
-#include "brw_mipmap_tree.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-bool
-brw_meta_mirror_clip_and_scissor(const struct gl_context *ctx,
-                                 const struct gl_framebuffer *read_fb,
-                                 const struct gl_framebuffer *draw_fb,
-                                 GLfloat *srcX0, GLfloat *srcY0,
-                                 GLfloat *srcX1, GLfloat *srcY1,
-                                 GLfloat *dstX0, GLfloat *dstY0,
-                                 GLfloat *dstX1, GLfloat *dstY1,
-                                 bool *mirror_x, bool *mirror_y);
-
-union isl_color_value
-brw_meta_convert_fast_clear_color(const struct brw_context *brw,
-                                  const struct brw_mipmap_tree *mt,
-                                  const union gl_color_union *color);
-
-bool
-brw_is_color_fast_clear_compatible(struct brw_context *brw,
-                                   const struct brw_mipmap_tree *mt,
-                                   const union gl_color_union *color);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* BRW_META_UTIL_H */
diff --git a/src/mesa/drivers/dri/i965/brw_mipmap_tree.c b/src/mesa/drivers/dri/i965/brw_mipmap_tree.c
deleted file mode 100644
index 327340a..0000000
--- a/src/mesa/drivers/dri/i965/brw_mipmap_tree.c
+++ /dev/null
@@ -1,3308 +0,0 @@
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <GL/gl.h>
-#include <GL/internal/dri_interface.h>
-#include "drm-uapi/drm_fourcc.h"
-
-#include "brw_batch.h"
-#include "brw_image.h"
-#include "brw_mipmap_tree.h"
-#include "brw_tex.h"
-#include "brw_blit.h"
-#include "brw_fbo.h"
-
-#include "brw_blorp.h"
-#include "brw_context.h"
-#include "brw_state.h"
-
-#include "main/enums.h"
-#include "main/fbobject.h"
-#include "main/formats.h"
-#include "main/glformats.h"
-#include "main/texcompress_etc.h"
-#include "main/teximage.h"
-#include "main/streaming-load-memcpy.h"
-
-#include "util/format_srgb.h"
-#include "util/u_memory.h"
-
-#include "x86/common_x86_asm.h"
-
-#define FILE_DEBUG_FLAG DEBUG_MIPTREE
-
-static void *brw_miptree_map_raw(struct brw_context *brw,
-                                 struct brw_mipmap_tree *mt,
-                                 GLbitfield mode);
-
-static void brw_miptree_unmap_raw(struct brw_mipmap_tree *mt);
-
-/**
- * Return true if the format that will be used to access the miptree is
- * CCS_E-compatible with the miptree's linear/non-sRGB format.
- *
- * Why use the linear format? Well, although the miptree may be specified with
- * an sRGB format, the usage of that color space/format can be toggled. Since
- * our HW tends to support more linear formats than sRGB ones, we use this
- * format variant for check for CCS_E compatibility.
- */
-static bool
-format_ccs_e_compat_with_miptree(const struct intel_device_info *devinfo,
-                                 const struct brw_mipmap_tree *mt,
-                                 enum isl_format access_format)
-{
-   assert(mt->aux_usage == ISL_AUX_USAGE_CCS_E);
-
-   mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
-   enum isl_format isl_format = brw_isl_format_for_mesa_format(linear_format);
-   return isl_formats_are_ccs_e_compatible(devinfo, isl_format, access_format);
-}
-
-/* Determine if CCS_E is supported for a given platform and mesa format. */
-static bool
-format_supports_ccs_e(const struct brw_context *brw, mesa_format format)
-{
-   /* For now compression is only enabled for integer formats even though
-    * there exist supported floating point formats also. This is a heuristic
-    * decision based on current public benchmarks. In none of the cases these
-    * formats provided any improvement but a few cases were seen to regress.
-    * Hence these are left to to be enabled in the future when they are known
-    * to improve things.
-    */
-   if (_mesa_get_format_datatype(format) == GL_FLOAT)
-      return false;
-
-   /* Many window system buffers are sRGB even if they are never rendered as
-    * sRGB.  For those, we want CCS_E for when sRGBEncode is false.  When the
-    * surface is used as sRGB, we fall back to CCS_D.
-    */
-   mesa_format linear_format = _mesa_get_srgb_format_linear(format);
-   enum isl_format isl_format = brw_isl_format_for_mesa_format(linear_format);
-   return isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format);
-}
-
-/**
- * Determine depth format corresponding to a depth+stencil format,
- * for separate stencil.
- */
-mesa_format
-brw_depth_format_for_depthstencil_format(mesa_format format) {
-   switch (format) {
-   case MESA_FORMAT_Z24_UNORM_S8_UINT:
-      return MESA_FORMAT_Z24_UNORM_X8_UINT;
-   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
-      return MESA_FORMAT_Z_FLOAT32;
-   default:
-      return format;
-   }
-}
-
-static bool
-create_mapping_table(GLenum target, unsigned first_level, unsigned last_level,
-                     unsigned depth0, struct brw_mipmap_level *table)
-{
-   for (unsigned level = first_level; level <= last_level; level++) {
-      const unsigned d =
-         target == GL_TEXTURE_3D ? minify(depth0, level) : depth0;
-
-      table[level].slice = calloc(d, sizeof(*table[0].slice));
-      if (!table[level].slice)
-         goto unwind;
-   }
-
-   return true;
-
-unwind:
-   for (unsigned level = first_level; level <= last_level; level++)
-      free(table[level].slice);
-
-   return false;
-}
-
-static bool
-needs_separate_stencil(const struct brw_context *brw,
-                       struct brw_mipmap_tree *mt,
-                       mesa_format format)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (_mesa_get_format_base_format(format) != GL_DEPTH_STENCIL)
-      return false;
-
-   if (devinfo->must_use_separate_stencil)
-      return true;
-
-   return brw->has_separate_stencil && brw->has_hiz;
-}
-
-/**
- * Choose the aux usage for this miptree.  This function must be called fairly
- * late in the miptree create process after we have a tiling.
- */
-static void
-brw_miptree_choose_aux_usage(struct brw_context *brw,
-                             struct brw_mipmap_tree *mt)
-{
-   assert(mt->aux_usage == ISL_AUX_USAGE_NONE);
-
-   if (_mesa_is_format_color_format(mt->format)) {
-      if (mt->surf.samples > 1) {
-         mt->aux_usage = ISL_AUX_USAGE_MCS;
-      } else if (!INTEL_DEBUG(DEBUG_NO_RBC) &&
-                 format_supports_ccs_e(brw, mt->format)) {
-         mt->aux_usage = ISL_AUX_USAGE_CCS_E;
-      } else if (brw->mesa_format_supports_render[mt->format]) {
-         mt->aux_usage = ISL_AUX_USAGE_CCS_D;
-      }
-   } else if (isl_surf_usage_is_depth(mt->surf.usage) && brw->has_hiz) {
-      mt->aux_usage = ISL_AUX_USAGE_HIZ;
-   }
-
-   /* We can do fast-clear on all auxiliary surface types that are
-    * allocated through the normal texture creation paths.
-    */
-   if (mt->aux_usage != ISL_AUX_USAGE_NONE)
-      mt->supports_fast_clear = true;
-}
-
-
-/**
- * Choose an appropriate uncompressed format for a requested
- * compressed format, if unsupported.
- */
-mesa_format
-brw_lower_compressed_format(struct brw_context *brw, mesa_format format)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   /* No need to lower ETC formats on these platforms,
-    * they are supported natively.
-    */
-   if (devinfo->ver >= 8 || devinfo->platform == INTEL_PLATFORM_BYT)
-      return format;
-
-   switch (format) {
-   case MESA_FORMAT_ETC1_RGB8:
-      return MESA_FORMAT_R8G8B8X8_UNORM;
-   case MESA_FORMAT_ETC2_RGB8:
-      return MESA_FORMAT_R8G8B8X8_UNORM;
-   case MESA_FORMAT_ETC2_SRGB8:
-   case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
-   case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
-      return MESA_FORMAT_B8G8R8A8_SRGB;
-   case MESA_FORMAT_ETC2_RGBA8_EAC:
-   case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
-      return MESA_FORMAT_R8G8B8A8_UNORM;
-   case MESA_FORMAT_ETC2_R11_EAC:
-      return MESA_FORMAT_R_UNORM16;
-   case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
-      return MESA_FORMAT_R_SNORM16;
-   case MESA_FORMAT_ETC2_RG11_EAC:
-      return MESA_FORMAT_RG_UNORM16;
-   case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
-      return MESA_FORMAT_RG_SNORM16;
-   default:
-      /* Non ETC1 / ETC2 format */
-      return format;
-   }
-}
-
-unsigned
-brw_get_num_logical_layers(const struct brw_mipmap_tree *mt, unsigned level)
-{
-   if (mt->surf.dim == ISL_SURF_DIM_3D)
-      return minify(mt->surf.logical_level0_px.depth, level);
-   else
-      return mt->surf.logical_level0_px.array_len;
-}
-
-UNUSED static unsigned
-get_num_phys_layers(const struct isl_surf *surf, unsigned level)
-{
-   /* In case of physical dimensions one needs to consider also the layout.
-    * See isl_calc_phys_level0_extent_sa().
-    */
-   if (surf->dim != ISL_SURF_DIM_3D)
-      return surf->phys_level0_sa.array_len;
-
-   if (surf->dim_layout == ISL_DIM_LAYOUT_GFX4_2D)
-      return minify(surf->phys_level0_sa.array_len, level);
-
-   return minify(surf->phys_level0_sa.depth, level);
-}
-
-/** \brief Assert that the level and layer are valid for the miptree. */
-void
-brw_miptree_check_level_layer(const struct brw_mipmap_tree *mt,
-                              uint32_t level,
-                              uint32_t layer)
-{
-   (void) mt;
-   (void) level;
-   (void) layer;
-
-   assert(level >= mt->first_level);
-   assert(level <= mt->last_level);
-   assert(layer < get_num_phys_layers(&mt->surf, level));
-}
-
-static enum isl_aux_state **
-create_aux_state_map(struct brw_mipmap_tree *mt,
-                     enum isl_aux_state initial)
-{
-   const uint32_t levels = mt->last_level + 1;
-
-   uint32_t total_slices = 0;
-   for (uint32_t level = 0; level < levels; level++)
-      total_slices += brw_get_num_logical_layers(mt, level);
-
-   const size_t per_level_array_size = levels * sizeof(enum isl_aux_state *);
-
-   /* We're going to allocate a single chunk of data for both the per-level
-    * reference array and the arrays of aux_state.  This makes cleanup
-    * significantly easier.
-    */
-   const size_t total_size = per_level_array_size +
-                             total_slices * sizeof(enum isl_aux_state);
-   void *data = malloc(total_size);
-   if (data == NULL)
-      return NULL;
-
-   enum isl_aux_state **per_level_arr = data;
-   enum isl_aux_state *s = data + per_level_array_size;
-   for (uint32_t level = 0; level < levels; level++) {
-      per_level_arr[level] = s;
-      const unsigned level_layers = brw_get_num_logical_layers(mt, level);
-      for (uint32_t a = 0; a < level_layers; a++)
-         *(s++) = initial;
-   }
-   assert((void *)s == data + total_size);
-
-   return per_level_arr;
-}
-
-static void
-free_aux_state_map(enum isl_aux_state **state)
-{
-   free(state);
-}
-
-static bool
-need_to_retile_as_linear(struct brw_context *brw, unsigned blt_pitch,
-                         enum isl_tiling tiling, unsigned samples)
-{
-   if (samples > 1)
-      return false;
-
-   if (tiling == ISL_TILING_LINEAR)
-      return false;
-
-   if (blt_pitch >= 32768) {
-      perf_debug("blt pitch %u too large to blit, falling back to untiled",
-                 blt_pitch);
-      return true;
-   }
-
-   return false;
-}
-
-static bool
-need_to_retile_as_x(const struct brw_context *brw, uint64_t size,
-                    enum isl_tiling tiling)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   /* If the BO is too large to fit in the aperture, we need to use the
-    * BLT engine to support it.  Prior to Sandybridge, the BLT paths can't
-    * handle Y-tiling, so we need to fall back to X.
-    */
-   if (devinfo->ver < 6 && size >= brw->max_gtt_map_object_size &&
-       tiling == ISL_TILING_Y0)
-      return true;
-
-   return false;
-}
-
-static struct brw_mipmap_tree *
-make_surface(struct brw_context *brw, GLenum target, mesa_format format,
-             unsigned first_level, unsigned last_level,
-             unsigned width0, unsigned height0, unsigned depth0,
-             unsigned num_samples, isl_tiling_flags_t tiling_flags,
-             isl_surf_usage_flags_t isl_usage_flags, uint32_t alloc_flags,
-             unsigned row_pitch_B, struct brw_bo *bo)
-{
-   struct brw_mipmap_tree *mt = calloc(sizeof(*mt), 1);
-   if (!mt)
-      return NULL;
-
-   if (!create_mapping_table(target, first_level, last_level, depth0,
-                             mt->level)) {
-      free(mt);
-      return NULL;
-   }
-
-   mt->refcount = 1;
-
-   if (target == GL_TEXTURE_CUBE_MAP ||
-       target == GL_TEXTURE_CUBE_MAP_ARRAY)
-      isl_usage_flags |= ISL_SURF_USAGE_CUBE_BIT;
-
-   DBG("%s: %s %s %ux %u:%u:%u %d..%d <-- %p\n",
-        __func__,
-       _mesa_enum_to_string(target),
-       _mesa_get_format_name(format),
-       num_samples, width0, height0, depth0,
-       first_level, last_level, mt);
-
-   struct isl_surf_init_info init_info = {
-      .dim = get_isl_surf_dim(target),
-      .format = translate_tex_format(brw, format, false),
-      .width = width0,
-      .height = height0,
-      .depth = target == GL_TEXTURE_3D ? depth0 : 1,
-      .levels = last_level - first_level + 1,
-      .array_len = target == GL_TEXTURE_3D ? 1 : depth0,
-      .samples = num_samples,
-      .row_pitch_B = row_pitch_B,
-      .usage = isl_usage_flags,
-      .tiling_flags = tiling_flags,
-   };
-
-   if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
-      goto fail;
-
-   /* Depth surfaces are always Y-tiled and stencil is always W-tiled, although
-    * on gfx7 platforms we also need to create Y-tiled copies of stencil for
-    * texturing since the hardware can't sample from W-tiled surfaces. For
-    * everything else, check for corner cases needing special treatment.
-    */
-   bool is_depth_stencil =
-      mt->surf.usage & (ISL_SURF_USAGE_STENCIL_BIT | ISL_SURF_USAGE_DEPTH_BIT);
-   if (!is_depth_stencil) {
-      if (need_to_retile_as_linear(brw, brw_miptree_blt_pitch(mt),
-                                   mt->surf.tiling, mt->surf.samples)) {
-         init_info.tiling_flags = 1u << ISL_TILING_LINEAR;
-         if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
-            goto fail;
-      } else if (need_to_retile_as_x(brw, mt->surf.size_B, mt->surf.tiling)) {
-         init_info.tiling_flags = 1u << ISL_TILING_X;
-         if (!isl_surf_init_s(&brw->isl_dev, &mt->surf, &init_info))
-            goto fail;
-      }
-   }
-
-   /* In case of linear the buffer gets padded by fixed 64 bytes and therefore
-    * the size may not be multiple of row_pitch.
-    * See isl_apply_surface_padding().
-    */
-   if (mt->surf.tiling != ISL_TILING_LINEAR)
-      assert(mt->surf.size_B % mt->surf.row_pitch_B == 0);
-
-   if (!bo) {
-      mt->bo = brw_bo_alloc_tiled(brw->bufmgr, "isl-miptree",
-                                  mt->surf.size_B,
-                                  BRW_MEMZONE_OTHER,
-                                  isl_tiling_to_i915_tiling(
-                                     mt->surf.tiling),
-                                  mt->surf.row_pitch_B, alloc_flags);
-      if (!mt->bo)
-         goto fail;
-   } else {
-      mt->bo = bo;
-   }
-
-   mt->first_level = first_level;
-   mt->last_level = last_level;
-   mt->target = target;
-   mt->format = format;
-   mt->aux_state = NULL;
-   mt->cpp = isl_format_get_layout(mt->surf.format)->bpb / 8;
-   mt->compressed = _mesa_is_format_compressed(format);
-   mt->drm_modifier = DRM_FORMAT_MOD_INVALID;
-
-   return mt;
-
-fail:
-   brw_miptree_release(&mt);
-   return NULL;
-}
-
-/* Return the usual surface usage flags for the given format. */
-static isl_surf_usage_flags_t
-mt_surf_usage(mesa_format format)
-{
-   switch(_mesa_get_format_base_format(format)) {
-   case GL_DEPTH_COMPONENT:
-      return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
-   case GL_DEPTH_STENCIL:
-      return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT |
-             ISL_SURF_USAGE_TEXTURE_BIT;
-   case GL_STENCIL_INDEX:
-      return ISL_SURF_USAGE_STENCIL_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
-   default:
-      return ISL_SURF_USAGE_RENDER_TARGET_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
-   }
-}
-
-static struct brw_mipmap_tree *
-miptree_create(struct brw_context *brw,
-               GLenum target,
-               mesa_format format,
-               GLuint first_level,
-               GLuint last_level,
-               GLuint width0,
-               GLuint height0,
-               GLuint depth0,
-               GLuint num_samples,
-               enum brw_miptree_create_flags flags)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const uint32_t alloc_flags =
-      (flags & MIPTREE_CREATE_BUSY || num_samples > 1) ? BO_ALLOC_BUSY : 0;
-   isl_tiling_flags_t tiling_flags = ISL_TILING_ANY_MASK;
-
-   /* TODO: This used to be because there wasn't BLORP to handle Y-tiling. */
-   if (devinfo->ver < 6 && _mesa_is_format_color_format(format))
-      tiling_flags &= ~ISL_TILING_Y0_BIT;
-
-   mesa_format mt_fmt = format;
-   if (!_mesa_is_format_color_format(format) && devinfo->ver >= 6) {
-      /* Fix up the Z miptree format for how we're splitting out separate
-       * stencil. Gfx7 expects there to be no stencil bits in its depth buffer.
-       */
-      mt_fmt = brw_depth_format_for_depthstencil_format(format);
-   }
-
-   struct brw_mipmap_tree *mt =
-      make_surface(brw, target, mt_fmt, first_level, last_level,
-                   width0, height0, depth0, num_samples,
-                   tiling_flags, mt_surf_usage(mt_fmt),
-                   alloc_flags, 0, NULL);
-
-   if (mt == NULL)
-      return NULL;
-
-   if (brw_miptree_needs_fake_etc(brw, mt)) {
-      mesa_format decomp_format = brw_lower_compressed_format(brw, format);
-      mt->shadow_mt = make_surface(brw, target, decomp_format, first_level,
-                                   last_level, width0, height0, depth0,
-                                   num_samples, tiling_flags,
-                                   mt_surf_usage(decomp_format),
-                                   alloc_flags, 0, NULL);
-
-      if (mt->shadow_mt == NULL) {
-         brw_miptree_release(&mt);
-         return NULL;
-      }
-   }
-
-   if (needs_separate_stencil(brw, mt, format)) {
-      mt->stencil_mt =
-         make_surface(brw, target, MESA_FORMAT_S_UINT8, first_level, last_level,
-                      width0, height0, depth0, num_samples,
-                      ISL_TILING_W_BIT, mt_surf_usage(MESA_FORMAT_S_UINT8),
-                      alloc_flags, 0, NULL);
-      if (mt->stencil_mt == NULL) {
-         brw_miptree_release(&mt);
-         return NULL;
-      }
-   }
-
-   if (!(flags & MIPTREE_CREATE_NO_AUX))
-      brw_miptree_choose_aux_usage(brw, mt);
-
-   return mt;
-}
-
-struct brw_mipmap_tree *
-brw_miptree_create(struct brw_context *brw,
-                   GLenum target,
-                   mesa_format format,
-                   GLuint first_level,
-                   GLuint last_level,
-                   GLuint width0,
-                   GLuint height0,
-                   GLuint depth0,
-                   GLuint num_samples,
-                   enum brw_miptree_create_flags flags)
-{
-   assert(num_samples > 0);
-
-   struct brw_mipmap_tree *mt = miptree_create(
-                                     brw, target, format,
-                                     first_level, last_level,
-                                     width0, height0, depth0, num_samples,
-                                     flags);
-   if (!mt)
-      return NULL;
-
-   mt->offset = 0;
-
-   /* Create the auxiliary surface up-front. CCS_D, on the other hand, can only
-    * compress clear color so we wait until an actual fast-clear to allocate
-    * it.
-    */
-   if (mt->aux_usage != ISL_AUX_USAGE_CCS_D &&
-       !brw_miptree_alloc_aux(brw, mt)) {
-      mt->aux_usage = ISL_AUX_USAGE_NONE;
-      mt->supports_fast_clear = false;
-   }
-
-   return mt;
-}
-
-struct brw_mipmap_tree *
-brw_miptree_create_for_bo(struct brw_context *brw,
-                          struct brw_bo *bo,
-                          mesa_format format,
-                          uint32_t offset,
-                          uint32_t width,
-                          uint32_t height,
-                          uint32_t depth,
-                          int pitch,
-                          enum isl_tiling tiling,
-                          enum brw_miptree_create_flags flags)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct brw_mipmap_tree *mt;
-   const GLenum target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
-   const GLenum base_format = _mesa_get_format_base_format(format);
-
-   if ((base_format == GL_DEPTH_COMPONENT ||
-        base_format == GL_DEPTH_STENCIL)) {
-      const mesa_format mt_fmt = (devinfo->ver < 6) ? format :
-         brw_depth_format_for_depthstencil_format(format);
-      mt = make_surface(brw, target, mt_fmt,
-                        0, 0, width, height, depth, 1, ISL_TILING_Y0_BIT,
-                        mt_surf_usage(mt_fmt),
-                        0, pitch, bo);
-      if (!mt)
-         return NULL;
-
-      brw_bo_reference(bo);
-
-      if (!(flags & MIPTREE_CREATE_NO_AUX))
-         brw_miptree_choose_aux_usage(brw, mt);
-
-      return mt;
-   } else if (format == MESA_FORMAT_S_UINT8) {
-      mt = make_surface(brw, target, MESA_FORMAT_S_UINT8,
-                        0, 0, width, height, depth, 1,
-                        ISL_TILING_W_BIT,
-                        mt_surf_usage(MESA_FORMAT_S_UINT8),
-                        0, pitch, bo);
-      if (!mt)
-         return NULL;
-
-      assert(bo->size >= mt->surf.size_B);
-
-      brw_bo_reference(bo);
-      return mt;
-   }
-
-   /* Nothing will be able to use this miptree with the BO if the offset isn't
-    * aligned.
-    */
-   if (tiling != ISL_TILING_LINEAR)
-      assert(offset % 4096 == 0);
-
-   /* miptrees can't handle negative pitch.  If you need flipping of images,
-    * that's outside of the scope of the mt.
-    */
-   assert(pitch >= 0);
-
-   mt = make_surface(brw, target, format,
-                     0, 0, width, height, depth, 1,
-                     1lu << tiling,
-                     mt_surf_usage(format),
-                     0, pitch, bo);
-   if (!mt)
-      return NULL;
-
-   brw_bo_reference(bo);
-   mt->bo = bo;
-   mt->offset = offset;
-
-   if (!(flags & MIPTREE_CREATE_NO_AUX)) {
-      brw_miptree_choose_aux_usage(brw, mt);
-
-      /* Create the auxiliary surface up-front. CCS_D, on the other hand, can
-       * only compress clear color so we wait until an actual fast-clear to
-       * allocate it.
-       */
-      if (mt->aux_usage != ISL_AUX_USAGE_CCS_D &&
-          !brw_miptree_alloc_aux(brw, mt)) {
-         mt->aux_usage = ISL_AUX_USAGE_NONE;
-         mt->supports_fast_clear = false;
-      }
-   }
-
-   return mt;
-}
-
-static struct brw_mipmap_tree *
-miptree_create_for_planar_image(struct brw_context *brw,
-                                __DRIimage *image, GLenum target,
-                                enum isl_tiling tiling)
-{
-   const struct brw_image_format *f = image->planar_format;
-   struct brw_mipmap_tree *planar_mt = NULL;
-
-   for (int i = 0; i < f->nplanes; i++) {
-      const int index = f->planes[i].buffer_index;
-      const uint32_t dri_format = f->planes[i].dri_format;
-      const mesa_format format = driImageFormatToGLFormat(dri_format);
-      const uint32_t width = image->width >> f->planes[i].width_shift;
-      const uint32_t height = image->height >> f->planes[i].height_shift;
-
-      /* Disable creation of the texture's aux buffers because the driver
-       * exposes no EGL API to manage them. That is, there is no API for
-       * resolving the aux buffer's content to the main buffer nor for
-       * invalidating the aux buffer's content.
-       */
-      struct brw_mipmap_tree *mt =
-         brw_miptree_create_for_bo(brw, image->bo, format,
-                                   image->offsets[index],
-                                   width, height, 1,
-                                   image->strides[index],
-                                   tiling,
-                                   MIPTREE_CREATE_NO_AUX);
-      if (mt == NULL) {
-         brw_miptree_release(&planar_mt);
-         return NULL;
-      }
-
-      mt->target = target;
-
-      if (i == 0)
-         planar_mt = mt;
-      else
-         planar_mt->plane[i - 1] = mt;
-   }
-
-   planar_mt->drm_modifier = image->modifier;
-
-   return planar_mt;
-}
-
-static bool
-create_ccs_buf_for_image(struct brw_context *brw,
-                         __DRIimage *image,
-                         struct brw_mipmap_tree *mt,
-                         enum isl_aux_state initial_state)
-{
-   struct isl_surf temp_ccs_surf = {0,};
-
-   /* CCS is only supported for very simple miptrees */
-   assert(image->aux_offset != 0 && image->aux_pitch != 0);
-   assert(image->tile_x == 0 && image->tile_y == 0);
-   assert(mt->surf.samples == 1);
-   assert(mt->surf.levels == 1);
-   assert(mt->surf.logical_level0_px.depth == 1);
-   assert(mt->surf.logical_level0_px.array_len == 1);
-   assert(mt->first_level == 0);
-   assert(mt->last_level == 0);
-
-   /* We shouldn't already have a CCS */
-   assert(!mt->aux_buf);
-
-   if (!isl_surf_get_ccs_surf(&brw->isl_dev, &mt->surf, NULL,
-                              &temp_ccs_surf, image->aux_pitch))
-      return false;
-
-   assert(image->aux_offset < image->bo->size);
-   assert(temp_ccs_surf.size_B <= image->bo->size - image->aux_offset);
-
-   mt->aux_buf = calloc(sizeof(*mt->aux_buf), 1);
-   if (mt->aux_buf == NULL)
-      return false;
-
-   mt->aux_state = create_aux_state_map(mt, initial_state);
-   if (!mt->aux_state) {
-      free(mt->aux_buf);
-      mt->aux_buf = NULL;
-      return false;
-   }
-
-   /* On gfx10+ we start using an extra space in the aux buffer to store the
-    * indirect clear color. However, if we imported an image from the window
-    * system with CCS, we don't have the extra space at the end of the aux
-    * buffer. So create a new bo here that will store that clear color.
-    */
-   if (brw->isl_dev.ss.clear_color_state_size > 0) {
-      mt->aux_buf->clear_color_bo =
-         brw_bo_alloc_tiled(brw->bufmgr, "clear_color_bo",
-                            brw->isl_dev.ss.clear_color_state_size,
-                            BRW_MEMZONE_OTHER, I915_TILING_NONE, 0,
-                            BO_ALLOC_ZEROED);
-      if (!mt->aux_buf->clear_color_bo) {
-         free(mt->aux_buf);
-         mt->aux_buf = NULL;
-         return false;
-      }
-   }
-
-   mt->aux_buf->bo = image->bo;
-   brw_bo_reference(image->bo);
-
-   mt->aux_buf->offset = image->aux_offset;
-   mt->aux_buf->surf = temp_ccs_surf;
-
-   return true;
-}
-
-struct brw_mipmap_tree *
-brw_miptree_create_for_dri_image(struct brw_context *brw,
-                                 __DRIimage *image, GLenum target,
-                                 mesa_format format,
-                                 bool allow_internal_aux)
-{
-   uint32_t bo_tiling, bo_swizzle;
-   brw_bo_get_tiling(image->bo, &bo_tiling, &bo_swizzle);
-
-   const struct isl_drm_modifier_info *mod_info =
-      isl_drm_modifier_get_info(image->modifier);
-
-   const enum isl_tiling tiling =
-      mod_info ? mod_info->tiling : isl_tiling_from_i915_tiling(bo_tiling);
-
-   if (image->planar_format && image->planar_format->nplanes > 1)
-      return miptree_create_for_planar_image(brw, image, target, tiling);
-
-   if (image->planar_format)
-      assert(image->planar_format->planes[0].dri_format == image->dri_format);
-
-   if (!brw->ctx.TextureFormatSupported[format]) {
-      /* The texture storage paths in core Mesa detect if the driver does not
-       * support the user-requested format, and then searches for a
-       * fallback format. The DRIimage code bypasses core Mesa, though. So we
-       * do the fallbacks here for important formats.
-       *
-       * We must support DRM_FOURCC_XBGR8888 textures because the Android
-       * framework produces HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces, which
-       * the Chrome OS compositor consumes as dma_buf EGLImages.
-       */
-      format = _mesa_format_fallback_rgbx_to_rgba(format);
-   }
-
-   if (!brw->ctx.TextureFormatSupported[format])
-      return NULL;
-
-   enum brw_miptree_create_flags mt_create_flags = 0;
-
-   /* If this image comes in from a window system, we have different
-    * requirements than if it comes in via an EGL import operation.  Window
-    * system images can use any form of auxiliary compression we wish because
-    * they get "flushed" before being handed off to the window system and we
-    * have the opportunity to do resolves.  Non window-system images, on the
-    * other hand, have no resolve point so we can't have aux without a
-    * modifier.
-    */
-   if (!allow_internal_aux)
-      mt_create_flags |= MIPTREE_CREATE_NO_AUX;
-
-   /* If we have a modifier which specifies aux, don't create one yet */
-   if (mod_info && mod_info->aux_usage != ISL_AUX_USAGE_NONE)
-      mt_create_flags |= MIPTREE_CREATE_NO_AUX;
-
-   /* Disable creation of the texture's aux buffers because the driver exposes
-    * no EGL API to manage them. That is, there is no API for resolving the aux
-    * buffer's content to the main buffer nor for invalidating the aux buffer's
-    * content.
-    */
-   struct brw_mipmap_tree *mt =
-      brw_miptree_create_for_bo(brw, image->bo, format,
-                                image->offset, image->width, image->height, 1,
-                                image->pitch, tiling, mt_create_flags);
-   if (mt == NULL)
-      return NULL;
-
-   mt->target = target;
-   mt->level[0].level_x = image->tile_x;
-   mt->level[0].level_y = image->tile_y;
-   mt->drm_modifier = image->modifier;
-
-   /* From "OES_EGL_image" error reporting. We report GL_INVALID_OPERATION
-    * for EGL images from non-tile aligned sufaces in gfx4 hw and earlier which has
-    * trouble resolving back to destination image due to alignment issues.
-    */
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   if (!devinfo->has_surface_tile_offset) {
-      uint32_t draw_x, draw_y;
-      brw_miptree_get_tile_offsets(mt, 0, 0, &draw_x, &draw_y);
-
-      if (draw_x != 0 || draw_y != 0) {
-         _mesa_error(&brw->ctx, GL_INVALID_OPERATION, __func__);
-         brw_miptree_release(&mt);
-         return NULL;
-      }
-   }
-
-   if (mod_info && mod_info->aux_usage != ISL_AUX_USAGE_NONE) {
-      assert(mod_info->aux_usage == ISL_AUX_USAGE_CCS_E);
-
-      mt->aux_usage = mod_info->aux_usage;
-      /* If we are a window system buffer, then we can support fast-clears
-       * even if the modifier doesn't support them by doing a partial resolve
-       * as part of the flush operation.
-       */
-      mt->supports_fast_clear =
-         allow_internal_aux || mod_info->supports_clear_color;
-
-      /* We don't know the actual state of the surface when we get it but we
-       * can make a pretty good guess based on the modifier.  What we do know
-       * for sure is that it isn't in the AUX_INVALID state, so we just assume
-       * a worst case of compression.
-       */
-      enum isl_aux_state initial_state =
-         isl_drm_modifier_get_default_aux_state(image->modifier);
-
-      if (!create_ccs_buf_for_image(brw, image, mt, initial_state)) {
-         brw_miptree_release(&mt);
-         return NULL;
-      }
-   }
-
-   /* Don't assume coherency for imported EGLimages.  We don't know what
-    * external clients are going to do with it.  They may scan it out.
-    */
-   image->bo->cache_coherent = false;
-
-   return mt;
-}
-
-/**
- * For a singlesample renderbuffer, this simply wraps the given BO with a
- * miptree.
- *
- * For a multisample renderbuffer, this wraps the window system's
- * (singlesample) BO with a singlesample miptree attached to the
- * brw_renderbuffer, then creates a multisample miptree attached to irb->mt
- * that will contain the actual rendering (which is lazily resolved to
- * irb->singlesample_mt).
- */
-bool
-brw_update_winsys_renderbuffer_miptree(struct brw_context *intel,
-                                         struct brw_renderbuffer *irb,
-                                         struct brw_mipmap_tree *singlesample_mt,
-                                         uint32_t width, uint32_t height,
-                                         uint32_t pitch)
-{
-   struct brw_mipmap_tree *multisample_mt = NULL;
-   struct gl_renderbuffer *rb = &irb->Base.Base;
-   mesa_format format = rb->Format;
-   const unsigned num_samples = MAX2(rb->NumSamples, 1);
-
-   /* Only the front and back buffers, which are color buffers, are allocated
-    * through the image loader.
-    */
-   assert(_mesa_get_format_base_format(format) == GL_RGB ||
-          _mesa_get_format_base_format(format) == GL_RGBA);
-
-   assert(singlesample_mt);
-
-   if (num_samples == 1) {
-      brw_miptree_release(&irb->mt);
-      irb->mt = singlesample_mt;
-
-      assert(!irb->singlesample_mt);
-   } else {
-      brw_miptree_release(&irb->singlesample_mt);
-      irb->singlesample_mt = singlesample_mt;
-
-      if (!irb->mt ||
-          irb->mt->surf.logical_level0_px.width != width ||
-          irb->mt->surf.logical_level0_px.height != height) {
-         multisample_mt = brw_miptree_create_for_renderbuffer(intel,
-                                                              format,
-                                                              width,
-                                                              height,
-                                                              num_samples);
-         if (!multisample_mt)
-            goto fail;
-
-         irb->need_downsample = false;
-         brw_miptree_release(&irb->mt);
-         irb->mt = multisample_mt;
-      }
-   }
-   return true;
-
-fail:
-   brw_miptree_release(&irb->mt);
-   return false;
-}
-
-struct brw_mipmap_tree*
-brw_miptree_create_for_renderbuffer(struct brw_context *brw,
-                                    mesa_format format,
-                                    uint32_t width,
-                                    uint32_t height,
-                                    uint32_t num_samples)
-{
-   struct brw_mipmap_tree *mt;
-   uint32_t depth = 1;
-   GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
-
-   mt = brw_miptree_create(brw, target, format, 0, 0,
-                           width, height, depth, num_samples,
-                           MIPTREE_CREATE_BUSY);
-   if (!mt)
-      goto fail;
-
-   return mt;
-
-fail:
-   brw_miptree_release(&mt);
-   return NULL;
-}
-
-void
-brw_miptree_reference(struct brw_mipmap_tree **dst,
-                      struct brw_mipmap_tree *src)
-{
-   if (*dst == src)
-      return;
-
-   brw_miptree_release(dst);
-
-   if (src) {
-      src->refcount++;
-      DBG("%s %p refcount now %d\n", __func__, src, src->refcount);
-   }
-
-   *dst = src;
-}
-
-static void
-brw_miptree_aux_buffer_free(struct brw_miptree_aux_buffer *aux_buf)
-{
-   if (aux_buf == NULL)
-      return;
-
-   brw_bo_unreference(aux_buf->bo);
-   brw_bo_unreference(aux_buf->clear_color_bo);
-
-   free(aux_buf);
-}
-
-void
-brw_miptree_release(struct brw_mipmap_tree **mt)
-{
-   if (!*mt)
-      return;
-
-   DBG("%s %p refcount will be %d\n", __func__, *mt, (*mt)->refcount - 1);
-   if (--(*mt)->refcount <= 0) {
-      GLuint i;
-
-      DBG("%s deleting %p\n", __func__, *mt);
-
-      brw_bo_unreference((*mt)->bo);
-      brw_miptree_release(&(*mt)->stencil_mt);
-      brw_miptree_release(&(*mt)->shadow_mt);
-      brw_miptree_aux_buffer_free((*mt)->aux_buf);
-      free_aux_state_map((*mt)->aux_state);
-
-      brw_miptree_release(&(*mt)->plane[0]);
-      brw_miptree_release(&(*mt)->plane[1]);
-
-      for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
-         free((*mt)->level[i].slice);
-      }
-
-      free(*mt);
-   }
-   *mt = NULL;
-}
-
-
-void
-brw_get_image_dims(struct gl_texture_image *image,
-                     int *width, int *height, int *depth)
-{
-   switch (image->TexObject->Target) {
-   case GL_TEXTURE_1D_ARRAY:
-      /* For a 1D Array texture the OpenGL API will treat the image height as
-       * the number of array slices. For Intel hardware, we treat the 1D array
-       * as a 2D Array with a height of 1. So, here we want to swap image
-       * height and depth.
-       */
-      assert(image->Depth == 1);
-      *width = image->Width;
-      *height = 1;
-      *depth = image->Height;
-      break;
-   case GL_TEXTURE_CUBE_MAP:
-      /* For Cube maps, the mesa/main api layer gives us a depth of 1 even
-       * though we really have 6 slices.
-       */
-      assert(image->Depth == 1);
-      *width = image->Width;
-      *height = image->Height;
-      *depth = 6;
-      break;
-   default:
-      *width = image->Width;
-      *height = image->Height;
-      *depth = image->Depth;
-      break;
-   }
-}
-
-/**
- * Can the image be pulled into a unified mipmap tree?  This mirrors
- * the completeness test in a lot of ways.
- *
- * Not sure whether I want to pass gl_texture_image here.
- */
-bool
-brw_miptree_match_image(struct brw_mipmap_tree *mt,
-                        struct gl_texture_image *image)
-{
-   struct brw_texture_image *brw_image = brw_texture_image(image);
-   GLuint level = brw_image->base.Base.Level;
-   int width, height, depth;
-
-   /* glTexImage* choose the texture object based on the target passed in, and
-    * objects can't change targets over their lifetimes, so this should be
-    * true.
-    */
-   assert(image->TexObject->Target == mt->target);
-
-   mesa_format mt_format = mt->format;
-   if (mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT && mt->stencil_mt)
-      mt_format = MESA_FORMAT_Z24_UNORM_S8_UINT;
-   if (mt->format == MESA_FORMAT_Z_FLOAT32 && mt->stencil_mt)
-      mt_format = MESA_FORMAT_Z32_FLOAT_S8X24_UINT;
-
-   if (_mesa_get_srgb_format_linear(image->TexFormat) !=
-       _mesa_get_srgb_format_linear(mt_format))
-      return false;
-
-   brw_get_image_dims(image, &width, &height, &depth);
-
-   if (mt->target == GL_TEXTURE_CUBE_MAP)
-      depth = 6;
-
-   if (level >= mt->surf.levels)
-      return false;
-
-   const unsigned level_depth =
-      mt->surf.dim == ISL_SURF_DIM_3D ?
-         minify(mt->surf.logical_level0_px.depth, level) :
-         mt->surf.logical_level0_px.array_len;
-
-   return width == minify(mt->surf.logical_level0_px.width, level) &&
-          height == minify(mt->surf.logical_level0_px.height, level) &&
-          depth == level_depth &&
-          MAX2(image->NumSamples, 1) == mt->surf.samples;
-}
-
-void
-brw_miptree_get_image_offset(const struct brw_mipmap_tree *mt,
-                             GLuint level, GLuint slice,
-                             GLuint *x, GLuint *y)
-{
-   if (level == 0 && slice == 0) {
-      *x = mt->level[0].level_x;
-      *y = mt->level[0].level_y;
-      return;
-   }
-
-   uint32_t x_offset_sa, y_offset_sa, z_offset_sa, array_offset;
-
-   /* Miptree itself can have an offset only if it represents a single
-    * slice in an imported buffer object.
-    * See brw_miptree_create_for_dri_image().
-    */
-   assert(mt->level[0].level_x == 0);
-   assert(mt->level[0].level_y == 0);
-
-   /* Given level is relative to level zero while the miptree may be
-    * represent just a subset of all levels starting from 'first_level'.
-    */
-   assert(level >= mt->first_level);
-   level -= mt->first_level;
-
-   const unsigned z = mt->surf.dim == ISL_SURF_DIM_3D ? slice : 0;
-   slice = mt->surf.dim == ISL_SURF_DIM_3D ? 0 : slice;
-   isl_surf_get_image_offset_el(&mt->surf, level, slice, z,
-                                &x_offset_sa, &y_offset_sa,
-                                &z_offset_sa, &array_offset);
-
-   *x = x_offset_sa;
-   *y = y_offset_sa;
-   assert(z_offset_sa == 0);
-   assert(array_offset == 0);
-}
-
-/**
- * Compute the offset (in bytes) from the start of the BO to the given x
- * and y coordinate.  For tiled BOs, caller must ensure that x and y are
- * multiples of the tile size.
- */
-uint32_t
-brw_miptree_get_aligned_offset(const struct brw_mipmap_tree *mt,
-                               uint32_t x, uint32_t y)
-{
-   int cpp = mt->cpp;
-   uint32_t pitch = mt->surf.row_pitch_B;
-
-   switch (mt->surf.tiling) {
-   default:
-      unreachable("not reached");
-   case ISL_TILING_LINEAR:
-      return y * pitch + x * cpp;
-   case ISL_TILING_X:
-      assert((x % (512 / cpp)) == 0);
-      assert((y % 8) == 0);
-      return y * pitch + x / (512 / cpp) * 4096;
-   case ISL_TILING_Y0:
-      assert((x % (128 / cpp)) == 0);
-      assert((y % 32) == 0);
-      return y * pitch + x / (128 / cpp) * 4096;
-   }
-}
-
-/**
- * Rendering with tiled buffers requires that the base address of the buffer
- * be aligned to a page boundary.  For renderbuffers, and sometimes with
- * textures, we may want the surface to point at a texture image level that
- * isn't at a page boundary.
- *
- * This function returns an appropriately-aligned base offset
- * according to the tiling restrictions, plus any required x/y offset
- * from there.
- */
-uint32_t
-brw_miptree_get_tile_offsets(const struct brw_mipmap_tree *mt,
-                             GLuint level, GLuint slice,
-                             uint32_t *tile_x,
-                             uint32_t *tile_y)
-{
-   uint32_t x, y;
-   uint32_t mask_x, mask_y;
-
-   isl_get_tile_masks(mt->surf.tiling, mt->cpp, &mask_x, &mask_y);
-   brw_miptree_get_image_offset(mt, level, slice, &x, &y);
-
-   *tile_x = x & mask_x;
-   *tile_y = y & mask_y;
-
-   return brw_miptree_get_aligned_offset(mt, x & ~mask_x, y & ~mask_y);
-}
-
-static void
-brw_miptree_copy_slice_sw(struct brw_context *brw,
-                          struct brw_mipmap_tree *src_mt,
-                          unsigned src_level, unsigned src_layer,
-                          struct brw_mipmap_tree *dst_mt,
-                          unsigned dst_level, unsigned dst_layer,
-                          unsigned width, unsigned height)
-{
-   void *src, *dst;
-   ptrdiff_t src_stride, dst_stride;
-   const unsigned cpp = (isl_format_get_layout(dst_mt->surf.format)->bpb / 8);
-
-   brw_miptree_map(brw, src_mt,
-                   src_level, src_layer,
-                   0, 0,
-                   width, height,
-                   GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
-                   &src, &src_stride);
-
-   brw_miptree_map(brw, dst_mt,
-                   dst_level, dst_layer,
-                   0, 0,
-                   width, height,
-                   GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
-                   BRW_MAP_DIRECT_BIT,
-                   &dst, &dst_stride);
-
-   DBG("sw blit %s mt %p %p/%"PRIdPTR" -> %s mt %p %p/%"PRIdPTR" (%dx%d)\n",
-       _mesa_get_format_name(src_mt->format),
-       src_mt, src, src_stride,
-       _mesa_get_format_name(dst_mt->format),
-       dst_mt, dst, dst_stride,
-       width, height);
-
-   int row_size = cpp * width;
-   if (src_stride == row_size &&
-       dst_stride == row_size) {
-      memcpy(dst, src, row_size * height);
-   } else {
-      for (int i = 0; i < height; i++) {
-         memcpy(dst, src, row_size);
-         dst += dst_stride;
-         src += src_stride;
-      }
-   }
-
-   brw_miptree_unmap(brw, dst_mt, dst_level, dst_layer);
-   brw_miptree_unmap(brw, src_mt, src_level, src_layer);
-
-   /* Don't forget to copy the stencil data over, too.  We could have skipped
-    * passing BRW_MAP_DIRECT_BIT, but that would have meant brw_miptree_map
-    * shuffling the two data sources in/out of temporary storage instead of
-    * the direct mapping we get this way.
-    */
-   if (dst_mt->stencil_mt) {
-      assert(src_mt->stencil_mt);
-      brw_miptree_copy_slice_sw(brw,
-                                src_mt->stencil_mt, src_level, src_layer,
-                                dst_mt->stencil_mt, dst_level, dst_layer,
-                                width, height);
-   }
-}
-
-void
-brw_miptree_copy_slice(struct brw_context *brw,
-                       struct brw_mipmap_tree *src_mt,
-                       unsigned src_level, unsigned src_layer,
-                       struct brw_mipmap_tree *dst_mt,
-                       unsigned dst_level, unsigned dst_layer)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   mesa_format format = src_mt->format;
-   unsigned width = minify(src_mt->surf.phys_level0_sa.width,
-                           src_level - src_mt->first_level);
-   unsigned height = minify(src_mt->surf.phys_level0_sa.height,
-                            src_level - src_mt->first_level);
-
-   assert(src_layer < get_num_phys_layers(&src_mt->surf,
-                                          src_level - src_mt->first_level));
-
-   assert(_mesa_get_srgb_format_linear(src_mt->format) ==
-          _mesa_get_srgb_format_linear(dst_mt->format));
-
-   DBG("validate blit mt %s %p %d,%d -> mt %s %p %d,%d (%dx%d)\n",
-       _mesa_get_format_name(src_mt->format),
-       src_mt, src_level, src_layer,
-       _mesa_get_format_name(dst_mt->format),
-       dst_mt, dst_level, dst_layer,
-       width, height);
-
-   if (devinfo->ver >= 6) {
-      /* On gfx6 and above, we just use blorp.  It's faster than the blitter
-       * and can handle everything without software fallbacks.
-       */
-      brw_blorp_copy_miptrees(brw,
-                              src_mt, src_level, src_layer,
-                              dst_mt, dst_level, dst_layer,
-                              0, 0, 0, 0, width, height);
-
-      if (src_mt->stencil_mt) {
-         assert(dst_mt->stencil_mt);
-         brw_blorp_copy_miptrees(brw,
-                                 src_mt->stencil_mt, src_level, src_layer,
-                                 dst_mt->stencil_mt, dst_level, dst_layer,
-                                 0, 0, 0, 0, width, height);
-      }
-      return;
-   }
-
-   if (dst_mt->compressed) {
-      unsigned int i, j;
-      _mesa_get_format_block_size(dst_mt->format, &i, &j);
-      height = ALIGN_NPOT(height, j) / j;
-      width = ALIGN_NPOT(width, i) / i;
-   }
-
-   /* Gfx4-5 doesn't support separate stencil */
-   assert(!src_mt->stencil_mt);
-
-   uint32_t dst_x, dst_y, src_x, src_y;
-   brw_miptree_get_image_offset(dst_mt, dst_level, dst_layer, &dst_x, &dst_y);
-   brw_miptree_get_image_offset(src_mt, src_level, src_layer, &src_x, &src_y);
-
-   DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
-       _mesa_get_format_name(src_mt->format),
-       src_mt, src_x, src_y, src_mt->surf.row_pitch_B,
-       _mesa_get_format_name(dst_mt->format),
-       dst_mt, dst_x, dst_y, dst_mt->surf.row_pitch_B,
-       width, height);
-
-   if (!brw_miptree_blit(brw,
-                           src_mt, src_level, src_layer, 0, 0, false,
-                           dst_mt, dst_level, dst_layer, 0, 0, false,
-                           width, height, COLOR_LOGICOP_COPY)) {
-      perf_debug("miptree validate blit for %s failed\n",
-                 _mesa_get_format_name(format));
-
-      brw_miptree_copy_slice_sw(brw,
-                                src_mt, src_level, src_layer,
-                                dst_mt, dst_level, dst_layer,
-                                width, height);
-   }
-}
-
-/**
- * Copies the image's current data to the given miptree, and associates that
- * miptree with the image.
- */
-void
-brw_miptree_copy_teximage(struct brw_context *brw,
-                          struct brw_texture_image *brw_image,
-                          struct brw_mipmap_tree *dst_mt)
-{
-   struct brw_mipmap_tree *src_mt = brw_image->mt;
-   struct brw_texture_object *intel_obj =
-      brw_texture_object(brw_image->base.Base.TexObject);
-   int level = brw_image->base.Base.Level;
-   const unsigned face = brw_image->base.Base.Face;
-   unsigned start_layer, end_layer;
-
-   if (intel_obj->base.Target == GL_TEXTURE_1D_ARRAY) {
-      assert(face == 0);
-      assert(brw_image->base.Base.Height);
-      start_layer = 0;
-      end_layer = brw_image->base.Base.Height - 1;
-   } else if (face > 0) {
-      start_layer = face;
-      end_layer = face;
-   } else {
-      assert(brw_image->base.Base.Depth);
-      start_layer = 0;
-      end_layer = brw_image->base.Base.Depth - 1;
-   }
-
-   for (unsigned i = start_layer; i <= end_layer; i++) {
-      brw_miptree_copy_slice(brw, src_mt, level, i, dst_mt, level, i);
-   }
-
-   brw_miptree_reference(&brw_image->mt, dst_mt);
-   intel_obj->needs_validate = true;
-}
-
-static struct brw_miptree_aux_buffer *
-brw_alloc_aux_buffer(struct brw_context *brw,
-                       const struct isl_surf *aux_surf,
-                       bool wants_memset,
-                       uint8_t memset_value)
-{
-   struct brw_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
-   if (!buf)
-      return false;
-
-   uint64_t size = aux_surf->size_B;
-
-   const bool has_indirect_clear = brw->isl_dev.ss.clear_color_state_size > 0;
-   if (has_indirect_clear) {
-      /* On CNL+, instead of setting the clear color in the SURFACE_STATE, we
-       * will set a pointer to a dword somewhere that contains the color. So,
-       * allocate the space for the clear color value here on the aux buffer.
-       */
-      buf->clear_color_offset = size;
-      size += brw->isl_dev.ss.clear_color_state_size;
-   }
-
-   /* If the buffer needs to be initialised (requiring the buffer to be
-    * immediately mapped to cpu space for writing), do not use the gpu access
-    * flag which can cause an unnecessary delay if the backing pages happened
-    * to be just used by the GPU.
-    */
-   const bool alloc_zeroed = wants_memset && memset_value == 0;
-   const bool needs_memset =
-      !alloc_zeroed && (wants_memset || has_indirect_clear);
-   const uint32_t alloc_flags =
-      alloc_zeroed ? BO_ALLOC_ZEROED : (needs_memset ? 0 : BO_ALLOC_BUSY);
-
-   /* ISL has stricter set of alignment rules then the drm allocator.
-    * Therefore one can pass the ISL dimensions in terms of bytes instead of
-    * trying to recalculate based on different format block sizes.
-    */
-   buf->bo = brw_bo_alloc_tiled(brw->bufmgr, "aux-miptree", size,
-                                BRW_MEMZONE_OTHER, I915_TILING_Y,
-                                aux_surf->row_pitch_B, alloc_flags);
-   if (!buf->bo) {
-      free(buf);
-      return NULL;
-   }
-
-   /* Initialize the bo to the desired value */
-   if (needs_memset) {
-      assert(!(alloc_flags & BO_ALLOC_BUSY));
-
-      void *map = brw_bo_map(brw, buf->bo, MAP_WRITE | MAP_RAW);
-      if (map == NULL) {
-         brw_miptree_aux_buffer_free(buf);
-         return NULL;
-      }
-
-      /* Memset the aux_surf portion of the BO. */
-      if (wants_memset)
-         memset(map, memset_value, aux_surf->size_B);
-
-      /* Zero the indirect clear color to match ::fast_clear_color. */
-      if (has_indirect_clear) {
-         memset((char *)map + buf->clear_color_offset, 0,
-                brw->isl_dev.ss.clear_color_state_size);
-      }
-
-      brw_bo_unmap(buf->bo);
-   }
-
-   if (has_indirect_clear) {
-      buf->clear_color_bo = buf->bo;
-      brw_bo_reference(buf->clear_color_bo);
-   }
-
-   buf->surf = *aux_surf;
-
-   return buf;
-}
-
-
-/**
- * Helper for brw_miptree_alloc_aux() that sets
- * \c mt->level[level].has_hiz. Return true if and only if
- * \c has_hiz was set.
- */
-static bool
-brw_miptree_level_enable_hiz(struct brw_context *brw,
-                             struct brw_mipmap_tree *mt,
-                             uint32_t level)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   assert(mt->aux_buf);
-   assert(mt->surf.size_B > 0);
-
-   if (devinfo->verx10 >= 75) {
-      uint32_t width = minify(mt->surf.phys_level0_sa.width, level);
-      uint32_t height = minify(mt->surf.phys_level0_sa.height, level);
-
-      /* Disable HiZ for LOD > 0 unless the width is 8 aligned
-       * and the height is 4 aligned. This allows our HiZ support
-       * to fulfill Haswell restrictions for HiZ ops. For LOD == 0,
-       * we can grow the width & height to allow the HiZ op to
-       * force the proper size alignments.
-       */
-      if (level > 0 && ((width & 7) || (height & 3))) {
-         DBG("mt %p level %d: HiZ DISABLED\n", mt, level);
-         return false;
-      }
-   }
-
-   DBG("mt %p level %d: HiZ enabled\n", mt, level);
-   mt->level[level].has_hiz = true;
-   return true;
-}
-
-
-/**
- * Allocate the initial aux surface for a miptree based on mt->aux_usage
- *
- * Since MCS, HiZ, and CCS_E can compress more than just clear color, we
- * create the auxiliary surfaces up-front.  CCS_D, on the other hand, can only
- * compress clear color so we wait until an actual fast-clear to allocate it.
- */
-bool
-brw_miptree_alloc_aux(struct brw_context *brw, struct brw_mipmap_tree *mt)
-{
-   assert(mt->aux_buf == NULL);
-
-   /* Get the aux buf allocation parameters for this miptree. */
-   enum isl_aux_state initial_state;
-   uint8_t memset_value;
-   struct isl_surf aux_surf = {0,};
-   bool aux_surf_ok = false;
-
-   switch (mt->aux_usage) {
-   case ISL_AUX_USAGE_NONE:
-      aux_surf.size_B = 0;
-      aux_surf_ok = true;
-      break;
-   case ISL_AUX_USAGE_HIZ:
-      initial_state = ISL_AUX_STATE_AUX_INVALID;
-      memset_value = 0;
-      aux_surf_ok = isl_surf_get_hiz_surf(&brw->isl_dev, &mt->surf, &aux_surf);
-      break;
-   case ISL_AUX_USAGE_MCS:
-      /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
-       *
-       *     When MCS buffer is enabled and bound to MSRT, it is required that
-       *     it is cleared prior to any rendering.
-       *
-       * Since we don't use the MCS buffer for any purpose other than
-       * rendering, it makes sense to just clear it immediately upon
-       * allocation.
-       *
-       * Note: the clear value for MCS buffers is all 1's, so we memset to
-       * 0xff.
-       */
-      initial_state = ISL_AUX_STATE_CLEAR;
-      memset_value = 0xFF;
-      aux_surf_ok = isl_surf_get_mcs_surf(&brw->isl_dev, &mt->surf, &aux_surf);
-      break;
-   case ISL_AUX_USAGE_CCS_D:
-   case ISL_AUX_USAGE_CCS_E:
-      /* When CCS_E is used, we need to ensure that the CCS starts off in a
-       * valid state.  From the Sky Lake PRM, "MCS Buffer for Render
-       * Target(s)":
-       *
-       *    "If Software wants to enable Color Compression without Fast
-       *    clear, Software needs to initialize MCS with zeros."
-       *
-       * A CCS value of 0 indicates that the corresponding block is in the
-       * pass-through state which is what we want.
-       *
-       * For CCS_D, do the same thing. On gfx9+, this avoids having any
-       * undefined bits in the aux buffer.
-       */
-      initial_state = ISL_AUX_STATE_PASS_THROUGH;
-      memset_value = 0;
-      aux_surf_ok =
-         isl_surf_get_ccs_surf(&brw->isl_dev, &mt->surf, NULL, &aux_surf, 0);
-      break;
-
-   default:
-      unreachable("Invalid aux usage");
-   }
-
-   /* We should have a valid aux_surf. */
-   if (!aux_surf_ok)
-      return false;
-
-   /* No work is needed for a zero-sized auxiliary buffer. */
-   if (aux_surf.size_B == 0)
-      return true;
-
-   /* Create the aux_state for the auxiliary buffer. */
-   mt->aux_state = create_aux_state_map(mt, initial_state);
-   if (mt->aux_state == NULL)
-      return false;
-
-   /* Allocate the auxiliary buffer. */
-   const bool needs_memset = initial_state != ISL_AUX_STATE_AUX_INVALID;
-   mt->aux_buf = brw_alloc_aux_buffer(brw, &aux_surf, needs_memset,
-                                        memset_value);
-   if (mt->aux_buf == NULL) {
-      free_aux_state_map(mt->aux_state);
-      mt->aux_state = NULL;
-      return false;
-   }
-
-   /* Perform aux_usage-specific initialization. */
-   if (mt->aux_usage == ISL_AUX_USAGE_HIZ) {
-      for (unsigned level = mt->first_level; level <= mt->last_level; ++level)
-         brw_miptree_level_enable_hiz(brw, mt, level);
-   }
-
-   return true;
-}
-
-
-/**
- * Can the miptree sample using the hiz buffer?
- */
-bool
-brw_miptree_sample_with_hiz(struct brw_context *brw,
-                            struct brw_mipmap_tree *mt)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (!devinfo->has_sample_with_hiz) {
-      return false;
-   }
-
-   if (!mt->aux_buf) {
-      return false;
-   }
-
-   for (unsigned level = 0; level < mt->surf.levels; ++level) {
-      if (!brw_miptree_level_has_hiz(mt, level))
-         return false;
-   }
-
-   /* From the BDW PRM (Volume 2d: Command Reference: Structures
-    *                   RENDER_SURFACE_STATE.AuxiliarySurfaceMode):
-    *
-    *  "If this field is set to AUX_HIZ, Number of Multisamples must be
-    *   MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D.
-    *
-    * There is no such blurb for 1D textures, but there is sufficient evidence
-    * that this is broken on SKL+.
-    */
-   return (mt->surf.samples == 1 &&
-           mt->target != GL_TEXTURE_3D &&
-           mt->target != GL_TEXTURE_1D /* gfx9+ restriction */);
-}
-
-static bool
-level_has_aux(const struct brw_mipmap_tree *mt, uint32_t level)
-{
-   return isl_aux_usage_has_hiz(mt->aux_usage) ?
-          brw_miptree_level_has_hiz(mt, level) :
-          mt->aux_usage != ISL_AUX_USAGE_NONE && mt->aux_buf;
-}
-
-/**
- * Does the miptree slice have hiz enabled?
- */
-bool
-brw_miptree_level_has_hiz(const struct brw_mipmap_tree *mt, uint32_t level)
-{
-   brw_miptree_check_level_layer(mt, level, 0);
-   return mt->level[level].has_hiz;
-}
-
-static inline uint32_t
-miptree_level_range_length(const struct brw_mipmap_tree *mt,
-                           uint32_t start_level, uint32_t num_levels)
-{
-   assert(start_level >= mt->first_level);
-   assert(start_level <= mt->last_level);
-
-   if (num_levels == INTEL_REMAINING_LAYERS)
-      num_levels = mt->last_level - start_level + 1;
-   /* Check for overflow */
-   assert(start_level + num_levels >= start_level);
-   assert(start_level + num_levels <= mt->last_level + 1);
-
-   return num_levels;
-}
-
-static inline uint32_t
-miptree_layer_range_length(const struct brw_mipmap_tree *mt, uint32_t level,
-                           uint32_t start_layer, uint32_t num_layers)
-{
-   assert(level <= mt->last_level);
-
-   const uint32_t total_num_layers = brw_get_num_logical_layers(mt, level);
-   assert(start_layer < total_num_layers);
-   if (num_layers == INTEL_REMAINING_LAYERS)
-      num_layers = total_num_layers - start_layer;
-   /* Check for overflow */
-   assert(start_layer + num_layers >= start_layer);
-   assert(start_layer + num_layers <= total_num_layers);
-
-   return num_layers;
-}
-
-bool
-brw_miptree_has_color_unresolved(const struct brw_mipmap_tree *mt,
-                                 unsigned start_level, unsigned num_levels,
-                                 unsigned start_layer, unsigned num_layers)
-{
-   assert(_mesa_is_format_color_format(mt->format));
-
-   if (!mt->aux_buf)
-      return false;
-
-   /* Clamp the level range to fit the miptree */
-   num_levels = miptree_level_range_length(mt, start_level, num_levels);
-
-   for (uint32_t l = 0; l < num_levels; l++) {
-      const uint32_t level = start_level + l;
-      const uint32_t level_layers =
-         miptree_layer_range_length(mt, level, start_layer, num_layers);
-      for (unsigned a = 0; a < level_layers; a++) {
-         enum isl_aux_state aux_state =
-            brw_miptree_get_aux_state(mt, level, start_layer + a);
-         assert(aux_state != ISL_AUX_STATE_AUX_INVALID);
-         if (aux_state != ISL_AUX_STATE_PASS_THROUGH)
-            return true;
-      }
-   }
-
-   return false;
-}
-
-static void
-brw_miptree_check_color_resolve(const struct brw_context *brw,
-                                const struct brw_mipmap_tree *mt,
-                                unsigned level, unsigned layer)
-{
-   if (!mt->aux_buf)
-      return;
-
-   /* Fast color clear is supported for mipmapped surfaces only on Gfx8+. */
-   assert(brw->screen->devinfo.ver >= 8 ||
-          (level == 0 && mt->first_level == 0 && mt->last_level == 0));
-
-   /* Compression of arrayed msaa surfaces is supported. */
-   if (mt->surf.samples > 1)
-      return;
-
-   /* Fast color clear is supported for non-msaa arrays only on Gfx8+. */
-   assert(brw->screen->devinfo.ver >= 8 ||
-          (layer == 0 &&
-           mt->surf.logical_level0_px.depth == 1 &&
-           mt->surf.logical_level0_px.array_len == 1));
-
-   (void)level;
-   (void)layer;
-}
-
-void
-brw_miptree_prepare_access(struct brw_context *brw,
-                           struct brw_mipmap_tree *mt,
-                           uint32_t start_level, uint32_t num_levels,
-                           uint32_t start_layer, uint32_t num_layers,
-                           enum isl_aux_usage aux_usage,
-                           bool fast_clear_supported)
-{
-   const uint32_t clamped_levels =
-      miptree_level_range_length(mt, start_level, num_levels);
-   for (uint32_t l = 0; l < clamped_levels; l++) {
-      const uint32_t level = start_level + l;
-      if (!level_has_aux(mt, level))
-         continue;
-
-      const uint32_t level_layers =
-         miptree_layer_range_length(mt, level, start_layer, num_layers);
-      for (uint32_t a = 0; a < level_layers; a++) {
-         const uint32_t layer = start_layer + a;
-         const enum isl_aux_state aux_state =
-            brw_miptree_get_aux_state(mt, level, layer);
-         const enum isl_aux_op aux_op =
-            isl_aux_prepare_access(aux_state, aux_usage, fast_clear_supported);
-
-         if (aux_op == ISL_AUX_OP_NONE) {
-            /* Nothing to do here. */
-         } else if (isl_aux_usage_has_mcs(mt->aux_usage)) {
-            assert(aux_op == ISL_AUX_OP_PARTIAL_RESOLVE);
-            brw_blorp_mcs_partial_resolve(brw, mt, layer, 1);
-         } else if (isl_aux_usage_has_hiz(mt->aux_usage)) {
-            brw_hiz_exec(brw, mt, level, layer, 1, aux_op);
-         } else {
-            assert(isl_aux_usage_has_ccs(mt->aux_usage));
-            brw_miptree_check_color_resolve(brw, mt, level, layer);
-            brw_blorp_resolve_color(brw, mt, level, layer, aux_op);
-         }
-
-         const enum isl_aux_state new_state =
-            isl_aux_state_transition_aux_op(aux_state, mt->aux_usage, aux_op);
-         brw_miptree_set_aux_state(brw, mt, level, layer, 1, new_state);
-      }
-   }
-}
-
-void
-brw_miptree_finish_write(struct brw_context *brw,
-                         struct brw_mipmap_tree *mt, uint32_t level,
-                         uint32_t start_layer, uint32_t num_layers,
-                         enum isl_aux_usage aux_usage)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (mt->format == MESA_FORMAT_S_UINT8 && devinfo->ver <= 7) {
-      mt->shadow_needs_update = true;
-   } else if (brw_miptree_has_etc_shadow(brw, mt)) {
-      mt->shadow_needs_update = true;
-   }
-
-   if (!level_has_aux(mt, level))
-      return;
-
-   const uint32_t level_layers =
-      miptree_layer_range_length(mt, level, start_layer, num_layers);
-
-   for (uint32_t a = 0; a < level_layers; a++) {
-      const uint32_t layer = start_layer + a;
-      const enum isl_aux_state aux_state =
-         brw_miptree_get_aux_state(mt, level, layer);
-      const enum isl_aux_state new_aux_state =
-         isl_aux_state_transition_write(aux_state, aux_usage, false);
-      brw_miptree_set_aux_state(brw, mt, level, layer, 1, new_aux_state);
-   }
-}
-
-enum isl_aux_state
-brw_miptree_get_aux_state(const struct brw_mipmap_tree *mt,
-                          uint32_t level, uint32_t layer)
-{
-   brw_miptree_check_level_layer(mt, level, layer);
-
-   if (_mesa_is_format_color_format(mt->format)) {
-      assert(mt->aux_buf != NULL);
-      assert(mt->surf.samples == 1 ||
-             mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
-   } else if (mt->format == MESA_FORMAT_S_UINT8) {
-      unreachable("Cannot get aux state for stencil");
-   } else {
-      assert(brw_miptree_level_has_hiz(mt, level));
-   }
-
-   return mt->aux_state[level][layer];
-}
-
-void
-brw_miptree_set_aux_state(struct brw_context *brw,
-                          struct brw_mipmap_tree *mt, uint32_t level,
-                          uint32_t start_layer, uint32_t num_layers,
-                          enum isl_aux_state aux_state)
-{
-   num_layers = miptree_layer_range_length(mt, level, start_layer, num_layers);
-
-   if (_mesa_is_format_color_format(mt->format)) {
-      assert(mt->aux_buf != NULL);
-      assert(mt->surf.samples == 1 ||
-             mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
-   } else if (mt->format == MESA_FORMAT_S_UINT8) {
-      unreachable("Cannot get aux state for stencil");
-   } else {
-      assert(brw_miptree_level_has_hiz(mt, level));
-   }
-
-   for (unsigned a = 0; a < num_layers; a++) {
-      if (mt->aux_state[level][start_layer + a] != aux_state) {
-         mt->aux_state[level][start_layer + a] = aux_state;
-         brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE;
-      }
-   }
-}
-
-/* On Gfx9 color buffers may be compressed by the hardware (lossless
- * compression). There are, however, format restrictions and care needs to be
- * taken that the sampler engine is capable for re-interpreting a buffer with
- * format different the buffer was originally written with.
- *
- * For example, SRGB formats are not compressible and the sampler engine isn't
- * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying
- * color buffer needs to be resolved so that the sampling surface can be
- * sampled as non-compressed (i.e., without the auxiliary MCS buffer being
- * set).
- */
-static bool
-can_texture_with_ccs(struct brw_context *brw,
-                     struct brw_mipmap_tree *mt,
-                     enum isl_format view_format)
-{
-   if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
-      return false;
-
-   if (!format_ccs_e_compat_with_miptree(&brw->screen->devinfo,
-                                         mt, view_format)) {
-      perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",
-                 isl_format_get_name(view_format),
-                 _mesa_get_format_name(mt->format));
-      return false;
-   }
-
-   return true;
-}
-
-enum isl_aux_usage
-brw_miptree_texture_aux_usage(struct brw_context *brw,
-                              struct brw_mipmap_tree *mt,
-                              enum isl_format view_format,
-                              enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits)
-{
-   assert(brw->screen->devinfo.ver == 9 || astc5x5_wa_bits == 0);
-
-   /* On gfx9, ASTC 5x5 textures cannot live in the sampler cache along side
-    * CCS or HiZ compressed textures.  See gfx9_apply_astc5x5_wa_flush() for
-    * details.
-    */
-   if ((astc5x5_wa_bits & GFX9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
-       mt->aux_usage != ISL_AUX_USAGE_MCS)
-      return ISL_AUX_USAGE_NONE;
-
-   switch (mt->aux_usage) {
-   case ISL_AUX_USAGE_HIZ:
-      if (brw_miptree_sample_with_hiz(brw, mt))
-         return ISL_AUX_USAGE_HIZ;
-      break;
-
-   case ISL_AUX_USAGE_MCS:
-      return ISL_AUX_USAGE_MCS;
-
-   case ISL_AUX_USAGE_CCS_D:
-   case ISL_AUX_USAGE_CCS_E:
-      if (!mt->aux_buf) {
-         assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D);
-         return ISL_AUX_USAGE_NONE;
-      }
-
-      /* If we don't have any unresolved color, report an aux usage of
-       * ISL_AUX_USAGE_NONE.  This way, texturing won't even look at the
-       * aux surface and we can save some bandwidth.
-       */
-      if (!brw_miptree_has_color_unresolved(mt, 0, INTEL_REMAINING_LEVELS,
-                                              0, INTEL_REMAINING_LAYERS))
-         return ISL_AUX_USAGE_NONE;
-
-      if (can_texture_with_ccs(brw, mt, view_format))
-         return ISL_AUX_USAGE_CCS_E;
-      break;
-
-   default:
-      break;
-   }
-
-   return ISL_AUX_USAGE_NONE;
-}
-
-static bool
-isl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b)
-{
-   /* On gfx8 and earlier, the hardware was only capable of handling 0/1 clear
-    * values so sRGB curve application was a no-op for all fast-clearable
-    * formats.
-    *
-    * On gfx9+, the hardware supports arbitrary clear values.  For sRGB clear
-    * values, the hardware interprets the floats, not as what would be
-    * returned from the sampler (or written by the shader), but as being
-    * between format conversion and sRGB curve application.  This means that
-    * we can switch between sRGB and UNORM without having to whack the clear
-    * color.
-    */
-   return isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b);
-}
-
-void
-brw_miptree_prepare_texture(struct brw_context *brw,
-                            struct brw_mipmap_tree *mt,
-                            enum isl_format view_format,
-                            uint32_t start_level, uint32_t num_levels,
-                            uint32_t start_layer, uint32_t num_layers,
-                            enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits)
-{
-   enum isl_aux_usage aux_usage =
-      brw_miptree_texture_aux_usage(brw, mt, view_format, astc5x5_wa_bits);
-
-   bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE;
-
-   /* Clear color is specified as ints or floats and the conversion is done by
-    * the sampler.  If we have a texture view, we would have to perform the
-    * clear color conversion manually.  Just disable clear color.
-    */
-   if (!isl_formats_are_fast_clear_compatible(mt->surf.format, view_format))
-      clear_supported = false;
-
-   brw_miptree_prepare_access(brw, mt, start_level, num_levels,
-                              start_layer, num_layers,
-                              aux_usage, clear_supported);
-}
-
-void
-brw_miptree_prepare_image(struct brw_context *brw, struct brw_mipmap_tree *mt)
-{
-   /* The data port doesn't understand any compression */
-   brw_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
-                              0, INTEL_REMAINING_LAYERS,
-                              ISL_AUX_USAGE_NONE, false);
-}
-
-enum isl_aux_usage
-brw_miptree_render_aux_usage(struct brw_context *brw,
-                             struct brw_mipmap_tree *mt,
-                             enum isl_format render_format,
-                             bool blend_enabled,
-                             bool draw_aux_disabled)
-{
-   struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (draw_aux_disabled)
-      return ISL_AUX_USAGE_NONE;
-
-   switch (mt->aux_usage) {
-   case ISL_AUX_USAGE_MCS:
-      assert(mt->aux_buf);
-      return ISL_AUX_USAGE_MCS;
-
-   case ISL_AUX_USAGE_CCS_D:
-   case ISL_AUX_USAGE_CCS_E:
-      if (!mt->aux_buf) {
-         assert(mt->aux_usage == ISL_AUX_USAGE_CCS_D);
-         return ISL_AUX_USAGE_NONE;
-      }
-
-      /* gfx9+ hardware technically supports non-0/1 clear colors with sRGB
-       * formats.  However, there are issues with blending where it doesn't
-       * properly apply the sRGB curve to the clear color when blending.
-       */
-      if (devinfo->ver >= 9 && blend_enabled &&
-          isl_format_is_srgb(render_format) &&
-          !isl_color_value_is_zero_one(mt->fast_clear_color, render_format))
-         return ISL_AUX_USAGE_NONE;
-
-      if (mt->aux_usage == ISL_AUX_USAGE_CCS_E &&
-          format_ccs_e_compat_with_miptree(&brw->screen->devinfo,
-                                           mt, render_format))
-         return ISL_AUX_USAGE_CCS_E;
-
-      /* Otherwise, we have to fall back to CCS_D */
-      return ISL_AUX_USAGE_CCS_D;
-
-   default:
-      return ISL_AUX_USAGE_NONE;
-   }
-}
-
-void
-brw_miptree_prepare_render(struct brw_context *brw,
-                           struct brw_mipmap_tree *mt, uint32_t level,
-                           uint32_t start_layer, uint32_t layer_count,
-                           enum isl_aux_usage aux_usage)
-{
-   brw_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
-                              aux_usage, aux_usage != ISL_AUX_USAGE_NONE);
-}
-
-void
-brw_miptree_finish_render(struct brw_context *brw,
-                          struct brw_mipmap_tree *mt, uint32_t level,
-                          uint32_t start_layer, uint32_t layer_count,
-                          enum isl_aux_usage aux_usage)
-{
-   assert(_mesa_is_format_color_format(mt->format));
-
-   brw_miptree_finish_write(brw, mt, level, start_layer, layer_count,
-                              aux_usage);
-}
-
-void
-brw_miptree_prepare_depth(struct brw_context *brw,
-                          struct brw_mipmap_tree *mt, uint32_t level,
-                          uint32_t start_layer, uint32_t layer_count)
-{
-   brw_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
-                              mt->aux_usage, mt->aux_buf != NULL);
-}
-
-void
-brw_miptree_finish_depth(struct brw_context *brw,
-                         struct brw_mipmap_tree *mt, uint32_t level,
-                         uint32_t start_layer, uint32_t layer_count,
-                         bool depth_written)
-{
-   if (depth_written) {
-      brw_miptree_finish_write(brw, mt, level, start_layer, layer_count,
-                               mt->aux_usage);
-   }
-}
-
-void
-brw_miptree_prepare_external(struct brw_context *brw,
-                             struct brw_mipmap_tree *mt)
-{
-   enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
-   bool supports_fast_clear = false;
-
-   const struct isl_drm_modifier_info *mod_info =
-      isl_drm_modifier_get_info(mt->drm_modifier);
-
-   if (mod_info && mod_info->aux_usage != ISL_AUX_USAGE_NONE) {
-      /* CCS_E is the only supported aux for external images and it's only
-       * supported on very simple images.
-       */
-      assert(mod_info->aux_usage == ISL_AUX_USAGE_CCS_E);
-      assert(_mesa_is_format_color_format(mt->format));
-      assert(mt->first_level == 0 && mt->last_level == 0);
-      assert(mt->surf.logical_level0_px.depth == 1);
-      assert(mt->surf.logical_level0_px.array_len == 1);
-      assert(mt->surf.samples == 1);
-      assert(mt->aux_buf != NULL);
-
-      aux_usage = mod_info->aux_usage;
-      supports_fast_clear = mod_info->supports_clear_color;
-   }
-
-   brw_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
-                              0, INTEL_REMAINING_LAYERS,
-                              aux_usage, supports_fast_clear);
-}
-
-void
-brw_miptree_finish_external(struct brw_context *brw,
-                            struct brw_mipmap_tree *mt)
-{
-   if (!mt->aux_buf)
-      return;
-
-   /* We don't know the actual aux state of the aux surface.  The previous
-    * owner could have given it to us in a number of different states.
-    * Because we don't know the aux state, we reset the aux state to the
-    * least common denominator of possible valid states.
-    */
-   enum isl_aux_state default_aux_state =
-      isl_drm_modifier_get_default_aux_state(mt->drm_modifier);
-   assert(mt->last_level == mt->first_level);
-   brw_miptree_set_aux_state(brw, mt, 0, 0, INTEL_REMAINING_LAYERS,
-                               default_aux_state);
-}
-
-/**
- * Make it possible to share the BO backing the given miptree with another
- * process or another miptree.
- *
- * Fast color clears are unsafe with shared buffers, so we need to resolve and
- * then discard the MCS buffer, if present.  We also set the no_ccs flag to
- * ensure that no MCS buffer gets allocated in the future.
- *
- * HiZ is similarly unsafe with shared buffers.
- */
-void
-brw_miptree_make_shareable(struct brw_context *brw,
-                           struct brw_mipmap_tree *mt)
-{
-   /* MCS buffers are also used for multisample buffers, but we can't resolve
-    * away a multisample MCS buffer because it's an integral part of how the
-    * pixel data is stored.  Fortunately this code path should never be
-    * reached for multisample buffers.
-    */
-   assert(mt->surf.msaa_layout == ISL_MSAA_LAYOUT_NONE ||
-          mt->surf.samples == 1);
-
-   brw_miptree_prepare_access(brw, mt, 0, INTEL_REMAINING_LEVELS,
-                              0, INTEL_REMAINING_LAYERS,
-                              ISL_AUX_USAGE_NONE, false);
-
-   if (mt->aux_buf) {
-      brw_miptree_aux_buffer_free(mt->aux_buf);
-      mt->aux_buf = NULL;
-
-      /* Make future calls of brw_miptree_level_has_hiz() return false. */
-      for (uint32_t l = mt->first_level; l <= mt->last_level; ++l) {
-         mt->level[l].has_hiz = false;
-      }
-
-      free(mt->aux_state);
-      mt->aux_state = NULL;
-      brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE;
-   }
-
-   mt->aux_usage = ISL_AUX_USAGE_NONE;
-   mt->supports_fast_clear = false;
-}
-
-
-/**
- * \brief Get pointer offset into stencil buffer.
- *
- * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
- * must decode the tile's layout in software.
- *
- * See
- *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
- *     Format.
- *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
- *
- * Even though the returned offset is always positive, the return type is
- * signed due to
- *    commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
- *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
- */
-static intptr_t
-brw_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
-{
-   uint32_t tile_size = 4096;
-   uint32_t tile_width = 64;
-   uint32_t tile_height = 64;
-   uint32_t row_size = 64 * stride / 2; /* Two rows are interleaved. */
-
-   uint32_t tile_x = x / tile_width;
-   uint32_t tile_y = y / tile_height;
-
-   /* The byte's address relative to the tile's base addres. */
-   uint32_t byte_x = x % tile_width;
-   uint32_t byte_y = y % tile_height;
-
-   uintptr_t u = tile_y * row_size
-               + tile_x * tile_size
-               + 512 * (byte_x / 8)
-               +  64 * (byte_y / 8)
-               +  32 * ((byte_y / 4) % 2)
-               +  16 * ((byte_x / 4) % 2)
-               +   8 * ((byte_y / 2) % 2)
-               +   4 * ((byte_x / 2) % 2)
-               +   2 * (byte_y % 2)
-               +   1 * (byte_x % 2);
-
-   if (swizzled) {
-      /* adjust for bit6 swizzling */
-      if (((byte_x / 8) % 2) == 1) {
-         if (((byte_y / 8) % 2) == 0) {
-            u += 64;
-         } else {
-            u -= 64;
-         }
-      }
-   }
-
-   return u;
-}
-
-void
-brw_miptree_updownsample(struct brw_context *brw,
-                         struct brw_mipmap_tree *src,
-                         struct brw_mipmap_tree *dst)
-{
-   unsigned src_w = src->surf.logical_level0_px.width;
-   unsigned src_h = src->surf.logical_level0_px.height;
-   unsigned dst_w = dst->surf.logical_level0_px.width;
-   unsigned dst_h = dst->surf.logical_level0_px.height;
-
-   brw_blorp_blit_miptrees(brw,
-                           src, 0 /* level */, 0 /* layer */,
-                           src->format, SWIZZLE_XYZW,
-                           dst, 0 /* level */, 0 /* layer */, dst->format,
-                           0, 0, src_w, src_h,
-                           0, 0, dst_w, dst_h,
-                           GL_NEAREST, false, false /*mirror x, y*/,
-                           false, false);
-
-   if (src->stencil_mt) {
-      src_w = src->stencil_mt->surf.logical_level0_px.width;
-      src_h = src->stencil_mt->surf.logical_level0_px.height;
-      dst_w = dst->stencil_mt->surf.logical_level0_px.width;
-      dst_h = dst->stencil_mt->surf.logical_level0_px.height;
-
-      brw_blorp_blit_miptrees(brw,
-                              src->stencil_mt, 0 /* level */, 0 /* layer */,
-                              src->stencil_mt->format, SWIZZLE_XYZW,
-                              dst->stencil_mt, 0 /* level */, 0 /* layer */,
-                              dst->stencil_mt->format,
-                              0, 0, src_w, src_h,
-                              0, 0, dst_w, dst_h,
-                              GL_NEAREST, false, false /*mirror x, y*/,
-                              false, false /* decode/encode srgb */);
-   }
-}
-
-void
-brw_update_r8stencil(struct brw_context *brw,
-                       struct brw_mipmap_tree *mt)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   assert(devinfo->ver >= 7);
-   struct brw_mipmap_tree *src =
-      mt->format == MESA_FORMAT_S_UINT8 ? mt : mt->stencil_mt;
-   if (!src || devinfo->ver >= 8)
-      return;
-
-   assert(src->surf.size_B > 0);
-
-   if (!mt->shadow_mt) {
-      assert(devinfo->ver > 6); /* Handle MIPTREE_LAYOUT_GFX6_HIZ_STENCIL */
-      mt->shadow_mt = make_surface(
-                            brw,
-                            src->target,
-                            MESA_FORMAT_R_UINT8,
-                            src->first_level, src->last_level,
-                            src->surf.logical_level0_px.width,
-                            src->surf.logical_level0_px.height,
-                            src->surf.dim == ISL_SURF_DIM_3D ?
-                               src->surf.logical_level0_px.depth :
-                               src->surf.logical_level0_px.array_len,
-                            src->surf.samples,
-                            ISL_TILING_Y0_BIT,
-                            ISL_SURF_USAGE_TEXTURE_BIT,
-                            BO_ALLOC_BUSY, 0, NULL);
-      assert(mt->shadow_mt);
-   }
-
-   if (src->shadow_needs_update == false)
-      return;
-
-   struct brw_mipmap_tree *dst = mt->shadow_mt;
-
-   for (int level = src->first_level; level <= src->last_level; level++) {
-      const unsigned depth = src->surf.dim == ISL_SURF_DIM_3D ?
-         minify(src->surf.phys_level0_sa.depth, level) :
-         src->surf.phys_level0_sa.array_len;
-
-      for (unsigned layer = 0; layer < depth; layer++) {
-         brw_blorp_copy_miptrees(brw,
-                                 src, level, layer,
-                                 dst, level, layer,
-                                 0, 0, 0, 0,
-                                 minify(src->surf.logical_level0_px.width,
-                                        level),
-                                 minify(src->surf.logical_level0_px.height,
-                                        level));
-      }
-   }
-
-   brw_cache_flush_for_read(brw, dst->bo);
-   src->shadow_needs_update = false;
-}
-
-static void *
-brw_miptree_map_raw(struct brw_context *brw,
-                    struct brw_mipmap_tree *mt,
-                    GLbitfield mode)
-{
-   struct brw_bo *bo = mt->bo;
-
-   if (brw_batch_references(&brw->batch, bo))
-      brw_batch_flush(brw);
-
-   return brw_bo_map(brw, bo, mode);
-}
-
-static void
-brw_miptree_unmap_raw(struct brw_mipmap_tree *mt)
-{
-   brw_bo_unmap(mt->bo);
-}
-
-static void
-brw_miptree_unmap_map(struct brw_context *brw,
-                      struct brw_mipmap_tree *mt,
-                      struct brw_miptree_map *map,
-                      unsigned int level, unsigned int slice)
-{
-   brw_miptree_unmap_raw(mt);
-}
-
-static void
-brw_miptree_map_map(struct brw_context *brw,
-                    struct brw_mipmap_tree *mt,
-                    struct brw_miptree_map *map,
-                    unsigned int level, unsigned int slice)
-{
-   unsigned int bw, bh;
-   void *base;
-   unsigned int image_x, image_y;
-   intptr_t x = map->x;
-   intptr_t y = map->y;
-
-   /* For compressed formats, the stride is the number of bytes per
-    * row of blocks.  brw_miptree_get_image_offset() already does
-    * the divide.
-    */
-   _mesa_get_format_block_size(mt->format, &bw, &bh);
-   assert(y % bh == 0);
-   assert(x % bw == 0);
-   y /= bh;
-   x /= bw;
-
-   brw_miptree_access_raw(brw, mt, level, slice,
-                          map->mode & GL_MAP_WRITE_BIT);
-
-   base = brw_miptree_map_raw(brw, mt, map->mode);
-
-   if (base == NULL)
-      map->ptr = NULL;
-   else {
-      base += mt->offset;
-
-      /* Note that in the case of cube maps, the caller must have passed the
-       * slice number referencing the face.
-      */
-      brw_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
-      x += image_x;
-      y += image_y;
-
-      map->stride = mt->surf.row_pitch_B;
-      map->ptr = base + y * map->stride + x * mt->cpp;
-   }
-
-   DBG("%s: %d,%d %dx%d from mt %p (%s) "
-       "%"PRIiPTR",%"PRIiPTR" = %p/%d\n", __func__,
-       map->x, map->y, map->w, map->h,
-       mt, _mesa_get_format_name(mt->format),
-       x, y, map->ptr, map->stride);
-
-   map->unmap = brw_miptree_unmap_map;
-}
-
-static void
-brw_miptree_unmap_blit(struct brw_context *brw,
-                       struct brw_mipmap_tree *mt,
-                       struct brw_miptree_map *map,
-                       unsigned int level,
-                       unsigned int slice)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-
-   brw_miptree_unmap_raw(map->linear_mt);
-
-   if (map->mode & GL_MAP_WRITE_BIT) {
-      if (devinfo->ver >= 6) {
-         brw_blorp_copy_miptrees(brw, map->linear_mt, 0, 0,
-                                 mt, level, slice,
-                                 0, 0, map->x, map->y, map->w, map->h);
-      } else {
-         bool ok = brw_miptree_copy(brw,
-                                    map->linear_mt, 0, 0, 0, 0,
-                                    mt, level, slice, map->x, map->y,
-                                    map->w, map->h);
-         WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
-      }
-   }
-
-   brw_miptree_release(&map->linear_mt);
-}
-
-/* Compute extent parameters for use with tiled_memcpy functions.
- * xs are in units of bytes and ys are in units of strides.
- */
-static inline void
-tile_extents(struct brw_mipmap_tree *mt, struct brw_miptree_map *map,
-             unsigned int level, unsigned int slice, unsigned int *x1_B,
-             unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el)
-{
-   unsigned int block_width, block_height;
-   unsigned int x0_el, y0_el;
-
-   _mesa_get_format_block_size(mt->format, &block_width, &block_height);
-
-   assert(map->x % block_width == 0);
-   assert(map->y % block_height == 0);
-
-   brw_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el);
-   *x1_B = (map->x / block_width + x0_el) * mt->cpp;
-   *y1_el = map->y / block_height + y0_el;
-   *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp;
-   *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
-}
-
-static void
-brw_miptree_unmap_tiled_memcpy(struct brw_context *brw,
-                               struct brw_mipmap_tree *mt,
-                               struct brw_miptree_map *map,
-                               unsigned int level,
-                               unsigned int slice)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (map->mode & GL_MAP_WRITE_BIT) {
-      unsigned int x1, x2, y1, y2;
-      tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
-
-      char *dst = brw_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
-      dst += mt->offset;
-
-      isl_memcpy_linear_to_tiled(
-         x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch_B, map->stride,
-         devinfo->has_bit6_swizzle, mt->surf.tiling, ISL_MEMCPY);
-
-      brw_miptree_unmap_raw(mt);
-   }
-   align_free(map->buffer);
-   map->buffer = map->ptr = NULL;
-}
-
-/**
- * Determine which copy function to use for the given format combination
- *
- * The only two possible copy functions which are ever returned are a
- * direct memcpy and a RGBA <-> BGRA copy function.  Since RGBA -> BGRA and
- * BGRA -> RGBA are exactly the same operation (and memcpy is obviously
- * symmetric), it doesn't matter whether the copy is from the tiled image
- * to the untiled or vice versa.  The copy function required is the same in
- * either case so this function can be used.
- *
- * \param[in]  tiledFormat The format of the tiled image
- * \param[in]  format      The GL format of the client data
- * \param[in]  type        The GL type of the client data
- * \param[out] mem_copy    Will be set to one of either the standard
- *                         library's memcpy or a different copy function
- *                         that performs an RGBA to BGRA conversion
- * \param[out] cpp         Number of bytes per channel
- *
- * \return true if the format and type combination are valid
- */
-isl_memcpy_type
-brw_miptree_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type,
-                            uint32_t *cpp)
-{
-   if (type == GL_UNSIGNED_INT_8_8_8_8_REV &&
-       !(format == GL_RGBA || format == GL_BGRA))
-      return ISL_MEMCPY_INVALID; /* Invalid type/format combination */
-
-   if ((tiledFormat == MESA_FORMAT_L_UNORM8 && format == GL_LUMINANCE) ||
-       (tiledFormat == MESA_FORMAT_A_UNORM8 && format == GL_ALPHA)) {
-      *cpp = 1;
-      return ISL_MEMCPY;
-   } else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) ||
-              (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) ||
-              (tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) ||
-              (tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) {
-      *cpp = 4;
-      if (format == GL_BGRA) {
-         return ISL_MEMCPY;
-      } else if (format == GL_RGBA) {
-         return ISL_MEMCPY_BGRA8;
-      }
-   } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) ||
-              (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) ||
-              (tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) ||
-              (tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) {
-      *cpp = 4;
-      if (format == GL_BGRA) {
-         /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can
-          * use the same function.
-          */
-         return ISL_MEMCPY_BGRA8;
-      } else if (format == GL_RGBA) {
-         return ISL_MEMCPY;
-      }
-   }
-
-   return ISL_MEMCPY_INVALID;
-}
-
-static void
-brw_miptree_map_tiled_memcpy(struct brw_context *brw,
-                             struct brw_mipmap_tree *mt,
-                             struct brw_miptree_map *map,
-                             unsigned int level, unsigned int slice)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   brw_miptree_access_raw(brw, mt, level, slice,
-                          map->mode & GL_MAP_WRITE_BIT);
-
-   unsigned int x1, x2, y1, y2;
-   tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
-   map->stride = ALIGN(_mesa_format_row_stride(mt->format, map->w), 16);
-
-   /* The tiling and detiling functions require that the linear buffer
-    * has proper 16-byte alignment (that is, its `x0` is 16-byte
-    * aligned). Here we over-allocate the linear buffer by enough
-    * bytes to get the proper alignment.
-    */
-   map->buffer = align_malloc(map->stride * (y2 - y1) + (x1 & 0xf), 16);
-   map->ptr = (char *)map->buffer + (x1 & 0xf);
-   assert(map->buffer);
-
-   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
-      char *src = brw_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
-      src += mt->offset;
-
-      const isl_memcpy_type copy_type =
-#if defined(USE_SSE41)
-         cpu_has_sse4_1 ? ISL_MEMCPY_STREAMING_LOAD :
-#endif
-         ISL_MEMCPY;
-
-      isl_memcpy_tiled_to_linear(
-         x1, x2, y1, y2, map->ptr, src, map->stride,
-         mt->surf.row_pitch_B, devinfo->has_bit6_swizzle, mt->surf.tiling,
-         copy_type);
-
-      brw_miptree_unmap_raw(mt);
-   }
-
-   map->unmap = brw_miptree_unmap_tiled_memcpy;
-}
-
-static void
-brw_miptree_map_blit(struct brw_context *brw,
-                     struct brw_mipmap_tree *mt,
-                     struct brw_miptree_map *map,
-                     unsigned int level, unsigned int slice)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   map->linear_mt = make_surface(brw, GL_TEXTURE_2D, mt->format,
-                                 0, 0, map->w, map->h, 1, 1,
-                                 ISL_TILING_LINEAR_BIT,
-                                 ISL_SURF_USAGE_RENDER_TARGET_BIT |
-                                 ISL_SURF_USAGE_TEXTURE_BIT,
-                                 0, 0, NULL);
-
-   if (!map->linear_mt) {
-      fprintf(stderr, "Failed to allocate blit temporary\n");
-      goto fail;
-   }
-   map->stride = map->linear_mt->surf.row_pitch_B;
-
-   /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
-    * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
-    * invalidate is set, since we'll be writing the whole rectangle from our
-    * temporary buffer back out.
-    */
-   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
-      if (devinfo->ver >= 6) {
-         brw_blorp_copy_miptrees(brw, mt, level, slice,
-                                 map->linear_mt, 0, 0,
-                                 map->x, map->y, 0, 0, map->w, map->h);
-      } else {
-         if (!brw_miptree_copy(brw,
-                                 mt, level, slice, map->x, map->y,
-                                 map->linear_mt, 0, 0, 0, 0,
-                                 map->w, map->h)) {
-            fprintf(stderr, "Failed to blit\n");
-            goto fail;
-         }
-      }
-   }
-
-   map->ptr = brw_miptree_map_raw(brw, map->linear_mt, map->mode);
-
-   DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
-       map->x, map->y, map->w, map->h,
-       mt, _mesa_get_format_name(mt->format),
-       level, slice, map->ptr, map->stride);
-
-   map->unmap = brw_miptree_unmap_blit;
-   return;
-
-fail:
-   brw_miptree_release(&map->linear_mt);
-   map->ptr = NULL;
-   map->stride = 0;
-}
-
-/**
- * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
- */
-#if defined(USE_SSE41)
-static void
-brw_miptree_unmap_movntdqa(struct brw_context *brw,
-                           struct brw_mipmap_tree *mt,
-                           struct brw_miptree_map *map,
-                           unsigned int level,
-                           unsigned int slice)
-{
-   align_free(map->buffer);
-   map->buffer = NULL;
-   map->ptr = NULL;
-}
-
-static void
-brw_miptree_map_movntdqa(struct brw_context *brw,
-                         struct brw_mipmap_tree *mt,
-                         struct brw_miptree_map *map,
-                         unsigned int level, unsigned int slice)
-{
-   assert(map->mode & GL_MAP_READ_BIT);
-   assert(!(map->mode & GL_MAP_WRITE_BIT));
-
-   brw_miptree_access_raw(brw, mt, level, slice, false);
-
-   DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
-       map->x, map->y, map->w, map->h,
-       mt, _mesa_get_format_name(mt->format),
-       level, slice, map->ptr, map->stride);
-
-   /* Map the original image */
-   uint32_t image_x;
-   uint32_t image_y;
-   brw_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
-   image_x += map->x;
-   image_y += map->y;
-
-   void *src = brw_miptree_map_raw(brw, mt, map->mode);
-   if (!src)
-      return;
-
-   src += mt->offset;
-
-   src += image_y * mt->surf.row_pitch_B;
-   src += image_x * mt->cpp;
-
-   /* Due to the pixel offsets for the particular image being mapped, our
-    * src pointer may not be 16-byte aligned.  However, if the pitch is
-    * divisible by 16, then the amount by which it's misaligned will remain
-    * consistent from row to row.
-    */
-   assert((mt->surf.row_pitch_B % 16) == 0);
-   const int misalignment = ((uintptr_t) src) & 15;
-
-   /* Create an untiled temporary buffer for the mapping. */
-   const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w);
-
-   map->stride = ALIGN(misalignment + width_bytes, 16);
-
-   map->buffer = align_malloc(map->stride * map->h, 16);
-   /* Offset the destination so it has the same misalignment as src. */
-   map->ptr = map->buffer + misalignment;
-
-   assert((((uintptr_t) map->ptr) & 15) == misalignment);
-
-   for (uint32_t y = 0; y < map->h; y++) {
-      void *dst_ptr = map->ptr + y * map->stride;
-      void *src_ptr = src + y * mt->surf.row_pitch_B;
-
-      _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
-   }
-
-   brw_miptree_unmap_raw(mt);
-
-   map->unmap = brw_miptree_unmap_movntdqa;
-}
-#endif
-
-static void
-brw_miptree_unmap_s8(struct brw_context *brw,
-                     struct brw_mipmap_tree *mt,
-                     struct brw_miptree_map *map,
-                     unsigned int level,
-                     unsigned int slice)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (map->mode & GL_MAP_WRITE_BIT) {
-      unsigned int image_x, image_y;
-      uint8_t *untiled_s8_map = map->ptr;
-      uint8_t *tiled_s8_map = brw_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT);
-
-      brw_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
-
-      for (uint32_t y = 0; y < map->h; y++) {
-         for (uint32_t x = 0; x < map->w; x++) {
-            ptrdiff_t offset = brw_offset_S8(mt->surf.row_pitch_B,
-                                             image_x + x + map->x,
-                                             image_y + y + map->y,
-                                             devinfo->has_bit6_swizzle);
-            tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
-         }
-      }
-
-      brw_miptree_unmap_raw(mt);
-   }
-
-   free(map->buffer);
-}
-
-static void
-brw_miptree_map_s8(struct brw_context *brw,
-                   struct brw_mipmap_tree *mt,
-                   struct brw_miptree_map *map,
-                   unsigned int level, unsigned int slice)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   map->stride = map->w;
-   map->buffer = map->ptr = malloc(map->stride * map->h);
-   if (!map->buffer)
-      return;
-
-   brw_miptree_access_raw(brw, mt, level, slice,
-                          map->mode & GL_MAP_WRITE_BIT);
-
-   /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
-    * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
-    * invalidate is set, since we'll be writing the whole rectangle from our
-    * temporary buffer back out.
-    */
-   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
-      uint8_t *untiled_s8_map = map->ptr;
-      uint8_t *tiled_s8_map = brw_miptree_map_raw(brw, mt, GL_MAP_READ_BIT);
-      unsigned int image_x, image_y;
-
-      brw_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
-
-      for (uint32_t y = 0; y < map->h; y++) {
-         for (uint32_t x = 0; x < map->w; x++) {
-            ptrdiff_t offset = brw_offset_S8(mt->surf.row_pitch_B,
-                                             x + image_x + map->x,
-                                             y + image_y + map->y,
-                                             devinfo->has_bit6_swizzle);
-            untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
-         }
-      }
-
-      brw_miptree_unmap_raw(mt);
-
-      DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__,
-          map->x, map->y, map->w, map->h,
-          mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
-   } else {
-      DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
-          map->x, map->y, map->w, map->h,
-          mt, map->ptr, map->stride);
-   }
-
-   map->unmap = brw_miptree_unmap_s8;
-}
-
-/**
- * Mapping functions for packed depth/stencil miptrees backed by real separate
- * miptrees for depth and stencil.
- *
- * On gfx7, and to support HiZ pre-gfx7, we have to have the stencil buffer
- * separate from the depth buffer.  Yet at the GL API level, we have to expose
- * packed depth/stencil textures and FBO attachments, and Mesa core expects to
- * be able to map that memory for texture storage and glReadPixels-type
- * operations.  We give Mesa core that access by mallocing a temporary and
- * copying the data between the actual backing store and the temporary.
- */
-static void
-brw_miptree_unmap_depthstencil(struct brw_context *brw,
-                               struct brw_mipmap_tree *mt,
-                               struct brw_miptree_map *map,
-                               unsigned int level,
-                               unsigned int slice)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct brw_mipmap_tree *z_mt = mt;
-   struct brw_mipmap_tree *s_mt = mt->stencil_mt;
-   bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
-
-   if (map->mode & GL_MAP_WRITE_BIT) {
-      uint32_t *packed_map = map->ptr;
-      uint8_t *s_map = brw_miptree_map_raw(brw, s_mt, GL_MAP_WRITE_BIT);
-      uint32_t *z_map = brw_miptree_map_raw(brw, z_mt, GL_MAP_WRITE_BIT);
-      unsigned int s_image_x, s_image_y;
-      unsigned int z_image_x, z_image_y;
-
-      brw_miptree_get_image_offset(s_mt, level, slice,
-                                   &s_image_x, &s_image_y);
-      brw_miptree_get_image_offset(z_mt, level, slice,
-                                   &z_image_x, &z_image_y);
-
-      for (uint32_t y = 0; y < map->h; y++) {
-         for (uint32_t x = 0; x < map->w; x++) {
-            ptrdiff_t s_offset = brw_offset_S8(s_mt->surf.row_pitch_B,
-                                               x + s_image_x + map->x,
-                                               y + s_image_y + map->y,
-                                               devinfo->has_bit6_swizzle);
-            ptrdiff_t z_offset = ((y + z_image_y + map->y) *
-                                  (z_mt->surf.row_pitch_B / 4) +
-                                  (x + z_image_x + map->x));
-
-            if (map_z32f_x24s8) {
-               z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
-               s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
-            } else {
-               uint32_t packed = packed_map[y * map->w + x];
-               s_map[s_offset] = packed >> 24;
-               z_map[z_offset] = packed;
-            }
-         }
-      }
-
-      brw_miptree_unmap_raw(s_mt);
-      brw_miptree_unmap_raw(z_mt);
-
-      DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
-          __func__,
-          map->x, map->y, map->w, map->h,
-          z_mt, _mesa_get_format_name(z_mt->format),
-          map->x + z_image_x, map->y + z_image_y,
-          s_mt, map->x + s_image_x, map->y + s_image_y,
-          map->ptr, map->stride);
-   }
-
-   free(map->buffer);
-}
-
-static void
-brw_miptree_map_depthstencil(struct brw_context *brw,
-                             struct brw_mipmap_tree *mt,
-                             struct brw_miptree_map *map,
-                             unsigned int level, unsigned int slice)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct brw_mipmap_tree *z_mt = mt;
-   struct brw_mipmap_tree *s_mt = mt->stencil_mt;
-   bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
-   int packed_bpp = map_z32f_x24s8 ? 8 : 4;
-
-   map->stride = map->w * packed_bpp;
-   map->buffer = map->ptr = malloc(map->stride * map->h);
-   if (!map->buffer)
-      return;
-
-   brw_miptree_access_raw(brw, z_mt, level, slice,
-                          map->mode & GL_MAP_WRITE_BIT);
-   brw_miptree_access_raw(brw, s_mt, level, slice,
-                          map->mode & GL_MAP_WRITE_BIT);
-
-   /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
-    * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
-    * invalidate is set, since we'll be writing the whole rectangle from our
-    * temporary buffer back out.
-    */
-   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
-      uint32_t *packed_map = map->ptr;
-      uint8_t *s_map = brw_miptree_map_raw(brw, s_mt, GL_MAP_READ_BIT);
-      uint32_t *z_map = brw_miptree_map_raw(brw, z_mt, GL_MAP_READ_BIT);
-      unsigned int s_image_x, s_image_y;
-      unsigned int z_image_x, z_image_y;
-
-      brw_miptree_get_image_offset(s_mt, level, slice,
-                                   &s_image_x, &s_image_y);
-      brw_miptree_get_image_offset(z_mt, level, slice,
-                                   &z_image_x, &z_image_y);
-
-      for (uint32_t y = 0; y < map->h; y++) {
-         for (uint32_t x = 0; x < map->w; x++) {
-            int map_x = map->x + x, map_y = map->y + y;
-            ptrdiff_t s_offset = brw_offset_S8(s_mt->surf.row_pitch_B,
-                                                 map_x + s_image_x,
-                                                 map_y + s_image_y,
-                                                 devinfo->has_bit6_swizzle);
-            ptrdiff_t z_offset = ((map_y + z_image_y) *
-                                  (z_mt->surf.row_pitch_B / 4) +
-                                  (map_x + z_image_x));
-            uint8_t s = s_map[s_offset];
-            uint32_t z = z_map[z_offset];
-
-            if (map_z32f_x24s8) {
-               packed_map[(y * map->w + x) * 2 + 0] = z;
-               packed_map[(y * map->w + x) * 2 + 1] = s;
-            } else {
-               packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
-            }
-         }
-      }
-
-      brw_miptree_unmap_raw(s_mt);
-      brw_miptree_unmap_raw(z_mt);
-
-      DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
-          __func__,
-          map->x, map->y, map->w, map->h,
-          z_mt, map->x + z_image_x, map->y + z_image_y,
-          s_mt, map->x + s_image_x, map->y + s_image_y,
-          map->ptr, map->stride);
-   } else {
-      DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
-          map->x, map->y, map->w, map->h,
-          mt, map->ptr, map->stride);
-   }
-
-   map->unmap = brw_miptree_unmap_depthstencil;
-}
-
-/**
- * Create and attach a map to the miptree at (level, slice). Return the
- * attached map.
- */
-static struct brw_miptree_map*
-brw_miptree_attach_map(struct brw_mipmap_tree *mt,
-                       unsigned int level,
-                       unsigned int slice,
-                       unsigned int x,
-                       unsigned int y,
-                       unsigned int w,
-                       unsigned int h,
-                       GLbitfield mode)
-{
-   struct brw_miptree_map *map = calloc(1, sizeof(*map));
-
-   if (!map)
-      return NULL;
-
-   assert(mt->level[level].slice[slice].map == NULL);
-   mt->level[level].slice[slice].map = map;
-
-   map->mode = mode;
-   map->x = x;
-   map->y = y;
-   map->w = w;
-   map->h = h;
-
-   return map;
-}
-
-/**
- * Release the map at (level, slice).
- */
-static void
-brw_miptree_release_map(struct brw_mipmap_tree *mt,
-                         unsigned int level,
-                         unsigned int slice)
-{
-   struct brw_miptree_map **map;
-
-   map = &mt->level[level].slice[slice].map;
-   free(*map);
-   *map = NULL;
-}
-
-static bool
-can_blit_slice(struct brw_mipmap_tree *mt,
-               const struct brw_miptree_map *map)
-{
-   /* See brw_miptree_blit() for details on the 32k pitch limit. */
-   const unsigned src_blt_pitch = brw_miptree_blt_pitch(mt);
-   const unsigned dst_blt_pitch = ALIGN(map->w * mt->cpp, 64);
-   return src_blt_pitch < 32768 && dst_blt_pitch < 32768;
-}
-
-static bool
-use_blitter_to_map(struct brw_context *brw,
-                   struct brw_mipmap_tree *mt,
-                   const struct brw_miptree_map *map)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (devinfo->has_llc &&
-      /* It's probably not worth swapping to the blit ring because of
-       * all the overhead involved.
-       */
-       !(map->mode & GL_MAP_WRITE_BIT) &&
-       !mt->compressed &&
-       (mt->surf.tiling == ISL_TILING_X ||
-        /* Prior to Sandybridge, the blitter can't handle Y tiling */
-        (devinfo->ver >= 6 && mt->surf.tiling == ISL_TILING_Y0) ||
-        /* Fast copy blit on skl+ supports all tiling formats. */
-        devinfo->ver >= 9) &&
-       can_blit_slice(mt, map))
-      return true;
-
-   if (mt->surf.tiling != ISL_TILING_LINEAR &&
-       mt->bo->size >= brw->max_gtt_map_object_size) {
-      assert(can_blit_slice(mt, map));
-      return true;
-   }
-
-   return false;
-}
-
-/**
- * Parameter \a out_stride has type ptrdiff_t not because the buffer stride may
- * exceed 32 bits but to diminish the likelihood subtle bugs in pointer
- * arithmetic overflow.
- *
- * If you call this function and use \a out_stride, then you're doing pointer
- * arithmetic on \a out_ptr. The type of \a out_stride doesn't prevent all
- * bugs.  The caller must still take care to avoid 32-bit overflow errors in
- * all arithmetic expressions that contain buffer offsets and pixel sizes,
- * which usually have type uint32_t or GLuint.
- */
-void
-brw_miptree_map(struct brw_context *brw,
-                struct brw_mipmap_tree *mt,
-                unsigned int level,
-                unsigned int slice,
-                unsigned int x,
-                unsigned int y,
-                unsigned int w,
-                unsigned int h,
-                GLbitfield mode,
-                void **out_ptr,
-                ptrdiff_t *out_stride)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct brw_miptree_map *map;
-
-   assert(mt->surf.samples == 1);
-
-   map = brw_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
-   if (!map){
-      *out_ptr = NULL;
-      *out_stride = 0;
-      return;
-   }
-
-   if (mt->format == MESA_FORMAT_S_UINT8) {
-      brw_miptree_map_s8(brw, mt, map, level, slice);
-   } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
-      brw_miptree_map_depthstencil(brw, mt, map, level, slice);
-   } else if (use_blitter_to_map(brw, mt, map)) {
-      brw_miptree_map_blit(brw, mt, map, level, slice);
-   } else if (mt->surf.tiling != ISL_TILING_LINEAR && devinfo->ver > 4) {
-      brw_miptree_map_tiled_memcpy(brw, mt, map, level, slice);
-#if defined(USE_SSE41)
-   } else if (!(mode & GL_MAP_WRITE_BIT) &&
-              !mt->compressed && cpu_has_sse4_1 &&
-              (mt->surf.row_pitch_B % 16 == 0)) {
-      brw_miptree_map_movntdqa(brw, mt, map, level, slice);
-#endif
-   } else {
-      if (mt->surf.tiling != ISL_TILING_LINEAR)
-         perf_debug("brw_miptree_map: mapping via gtt");
-      brw_miptree_map_map(brw, mt, map, level, slice);
-   }
-
-   *out_ptr = map->ptr;
-   *out_stride = map->stride;
-
-   if (map->ptr == NULL)
-      brw_miptree_release_map(mt, level, slice);
-}
-
-void
-brw_miptree_unmap(struct brw_context *brw,
-                  struct brw_mipmap_tree *mt,
-                  unsigned int level,
-                  unsigned int slice)
-{
-   struct brw_miptree_map *map = mt->level[level].slice[slice].map;
-
-   assert(mt->surf.samples == 1);
-
-   if (!map)
-      return;
-
-   DBG("%s: mt %p (%s) level %d slice %d\n", __func__,
-       mt, _mesa_get_format_name(mt->format), level, slice);
-
-   if (map->unmap)
-      map->unmap(brw, mt, map, level, slice);
-
-   brw_miptree_release_map(mt, level, slice);
-}
-
-enum isl_surf_dim
-get_isl_surf_dim(GLenum target)
-{
-   switch (target) {
-   case GL_TEXTURE_1D:
-   case GL_TEXTURE_1D_ARRAY:
-      return ISL_SURF_DIM_1D;
-
-   case GL_TEXTURE_2D:
-   case GL_TEXTURE_2D_ARRAY:
-   case GL_TEXTURE_RECTANGLE:
-   case GL_TEXTURE_CUBE_MAP:
-   case GL_TEXTURE_CUBE_MAP_ARRAY:
-   case GL_TEXTURE_2D_MULTISAMPLE:
-   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
-   case GL_TEXTURE_EXTERNAL_OES:
-      return ISL_SURF_DIM_2D;
-
-   case GL_TEXTURE_3D:
-      return ISL_SURF_DIM_3D;
-   }
-
-   unreachable("Invalid texture target");
-}
-
-enum isl_dim_layout
-get_isl_dim_layout(const struct intel_device_info *devinfo,
-                   enum isl_tiling tiling, GLenum target)
-{
-   switch (target) {
-   case GL_TEXTURE_1D:
-   case GL_TEXTURE_1D_ARRAY:
-      return (devinfo->ver >= 9 && tiling == ISL_TILING_LINEAR ?
-              ISL_DIM_LAYOUT_GFX9_1D : ISL_DIM_LAYOUT_GFX4_2D);
-
-   case GL_TEXTURE_2D:
-   case GL_TEXTURE_2D_ARRAY:
-   case GL_TEXTURE_RECTANGLE:
-   case GL_TEXTURE_2D_MULTISAMPLE:
-   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
-   case GL_TEXTURE_EXTERNAL_OES:
-      return ISL_DIM_LAYOUT_GFX4_2D;
-
-   case GL_TEXTURE_CUBE_MAP:
-   case GL_TEXTURE_CUBE_MAP_ARRAY:
-      return (devinfo->ver == 4 ? ISL_DIM_LAYOUT_GFX4_3D :
-              ISL_DIM_LAYOUT_GFX4_2D);
-
-   case GL_TEXTURE_3D:
-      return (devinfo->ver >= 9 ?
-              ISL_DIM_LAYOUT_GFX4_2D : ISL_DIM_LAYOUT_GFX4_3D);
-   }
-
-   unreachable("Invalid texture target");
-}
-
-bool
-brw_miptree_set_clear_color(struct brw_context *brw,
-                            struct brw_mipmap_tree *mt,
-                            union isl_color_value clear_color)
-{
-   if (memcmp(&mt->fast_clear_color, &clear_color, sizeof(clear_color)) != 0) {
-      mt->fast_clear_color = clear_color;
-      if (mt->aux_buf->clear_color_bo) {
-         /* We can't update the clear color while the hardware is still using
-          * the previous one for a resolve or sampling from it. Make sure that
-          * there are no pending commands at this point.
-          */
-         brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
-         for (int i = 0; i < 4; i++) {
-            brw_store_data_imm32(brw, mt->aux_buf->clear_color_bo,
-                                 mt->aux_buf->clear_color_offset + i * 4,
-                                 mt->fast_clear_color.u32[i]);
-         }
-         brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
-      }
-      brw->ctx.NewDriverState |= BRW_NEW_AUX_STATE;
-      return true;
-   }
-   return false;
-}
-
-union isl_color_value
-brw_miptree_get_clear_color(const struct brw_mipmap_tree *mt,
-                            struct brw_bo **clear_color_bo,
-                            uint64_t *clear_color_offset)
-{
-   assert(mt->aux_buf);
-
-   *clear_color_bo = mt->aux_buf->clear_color_bo;
-   *clear_color_offset = mt->aux_buf->clear_color_offset;
-   return mt->fast_clear_color;
-}
-
-static void
-brw_miptree_update_etc_shadow(struct brw_context *brw,
-                              struct brw_mipmap_tree *mt,
-                              unsigned int level,
-                              unsigned int slice,
-                              int level_w,
-                              int level_h)
-{
-   ptrdiff_t etc_stride, shadow_stride;
-   void *mptr, *sptr;
-   struct brw_mipmap_tree *smt = mt->shadow_mt;
-
-   assert(brw_miptree_has_etc_shadow(brw, mt));
-
-   brw_miptree_map(brw, mt, level, slice, 0, 0, level_w, level_h,
-                   GL_MAP_READ_BIT, &mptr, &etc_stride);
-   brw_miptree_map(brw, smt, level, slice, 0, 0, level_w, level_h,
-                   GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,
-                   &sptr, &shadow_stride);
-
-   if (mt->format == MESA_FORMAT_ETC1_RGB8) {
-      _mesa_etc1_unpack_rgba8888(sptr, shadow_stride, mptr, etc_stride,
-                                 level_w, level_h);
-   } else {
-      /* destination and source images must have the same swizzle */
-      bool is_bgra = (smt->format == MESA_FORMAT_B8G8R8A8_SRGB);
-      _mesa_unpack_etc2_format(sptr, shadow_stride, mptr, etc_stride,
-                               level_w, level_h, mt->format, is_bgra);
-   }
-
-   brw_miptree_unmap(brw, mt, level, slice);
-   brw_miptree_unmap(brw, smt, level, slice);
-}
-
-void
-brw_miptree_update_etc_shadow_levels(struct brw_context *brw,
-                                     struct brw_mipmap_tree *mt)
-{
-   struct brw_mipmap_tree *smt;
-   int num_slices;
-
-   assert(mt);
-   assert(mt->surf.size_B > 0);
-   assert(brw_miptree_has_etc_shadow(brw, mt));
-
-   smt = mt->shadow_mt;
-   num_slices = smt->surf.logical_level0_px.array_len;
-
-   for (int level = smt->first_level; level <= smt->last_level; level++) {
-      int level_w = minify(smt->surf.logical_level0_px.width,
-                           level - smt->first_level);
-      int level_h = minify(smt->surf.logical_level0_px.height,
-                           level - smt->first_level);
-
-      for (unsigned int slice = 0; slice < num_slices; slice++) {
-         brw_miptree_update_etc_shadow(brw, mt, level, slice, level_w,
-                                       level_h);
-      }
-   }
-
-   mt->shadow_needs_update = false;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_mipmap_tree.h b/src/mesa/drivers/dri/i965/brw_mipmap_tree.h
deleted file mode 100644
index 956163b..0000000
--- a/src/mesa/drivers/dri/i965/brw_mipmap_tree.h
+++ /dev/null
@@ -1,741 +0,0 @@
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/** @file intel_mipmap_tree.h
- *
- * This file defines the structure that wraps a BO and describes how the
- * mipmap levels and slices of a texture are laid out.
- *
- * The hardware has a fixed layout of a texture depending on parameters such
- * as the target/type (2D, 3D, CUBE), width, height, pitch, and number of
- * mipmap levels.  The individual level/layer slices are each 2D rectangles of
- * pixels at some x/y offset from the start of the brw_bo.
- *
- * Original OpenGL allowed texture miplevels to be specified in arbitrary
- * order, and a texture may change size over time.  Thus, each
- * brw_texture_image has a reference to a miptree that contains the pixel
- * data sized appropriately for it, which will later be referenced by/copied
- * to the brw_texture_object at draw time (brw_finalize_mipmap_tree()) so
- * that there's a single miptree for the complete texture.
- */
-
-#ifndef BRW_MIPMAP_TREE_H
-#define BRW_MIPMAP_TREE_H
-
-#include <assert.h>
-
-#include "main/mtypes.h"
-#include "isl/isl.h"
-#include "blorp/blorp.h"
-#include "brw_bufmgr.h"
-#include "brw_context.h"
-#include <GL/internal/dri_interface.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct brw_context;
-struct brw_renderbuffer;
-
-struct brw_texture_image;
-
-/**
- * This bit extends the set of GL_MAP_*_BIT enums.
- *
- * When calling brw_miptree_map() on an ETC-transcoded-to-RGB miptree or a
- * depthstencil-split-to-separate-stencil miptree, we'll normally make a
- * temporary and recreate the kind of data requested by Mesa core, since we're
- * satisfying some glGetTexImage() request or something.
- *
- * However, occasionally you want to actually map the miptree's current data
- * without transcoding back.  This flag to brw_miptree_map() gets you that.
- */
-#define BRW_MAP_DIRECT_BIT 0x80000000
-
-struct brw_miptree_map {
-   /** Bitfield of GL_MAP_*_BIT and BRW_MAP_*_BIT. */
-   GLbitfield mode;
-   /** Region of interest for the map. */
-   int x, y, w, h;
-   /** Possibly malloced temporary buffer for the mapping. */
-   void *buffer;
-   /** Possible pointer to a temporary linear miptree for the mapping. */
-   struct brw_mipmap_tree *linear_mt;
-   /** Pointer to the start of (map_x, map_y) returned by the mapping. */
-   void *ptr;
-   /** Stride of the mapping. */
-   int stride;
-
-   void (*unmap)(struct brw_context *brw,
-                 struct brw_mipmap_tree *mt,
-                 struct brw_miptree_map *map,
-                 unsigned int level,
-                 unsigned int slice);
-};
-
-/**
- * Describes the location of each texture image within a miptree.
- */
-struct brw_mipmap_level
-{
-   /** Offset to this miptree level, used in computing x_offset. */
-   GLuint level_x;
-   /** Offset to this miptree level, used in computing y_offset. */
-   GLuint level_y;
-
-   /**
-    * \brief Is HiZ enabled for this level?
-    *
-    * If \c mt->level[l].has_hiz is set, then (1) \c mt->hiz_mt has been
-    * allocated and (2) the HiZ memory for the slices in this level reside at
-    * \c mt->hiz_mt->level[l].
-    */
-   bool has_hiz;
-
-   /**
-    * \brief List of 2D images in this mipmap level.
-    *
-    * This may be a list of cube faces, array slices in 2D array texture, or
-    * layers in a 3D texture. The list's length is \c depth.
-    */
-   struct brw_mipmap_slice {
-      /**
-       * Mapping information. Persistent for the duration of
-       * brw_miptree_map/unmap on this slice.
-       */
-      struct brw_miptree_map *map;
-   } *slice;
-};
-
-/**
- * Miptree aux buffer. These buffers are associated with a miptree, but the
- * format is managed by the hardware.
- *
- * For Gfx7+, we always give the hardware the start of the buffer, and let it
- * handle all accesses to the buffer. Therefore we don't need the full miptree
- * layout structure for this buffer.
- */
-struct brw_miptree_aux_buffer
-{
-   struct isl_surf surf;
-
-   /**
-    * Buffer object containing the pixel data.
-    *
-    * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
-    * @see 3DSTATE_HIER_DEPTH_BUFFER.AuxiliarySurfaceBaseAddress
-    */
-   struct brw_bo *bo;
-
-   /**
-    * Offset into bo where the surface starts.
-    *
-    * @see brw_mipmap_aux_buffer::bo
-    *
-    * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
-    * @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
-    * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
-    * @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
-    */
-   uint32_t offset;
-
-   /**
-    * Buffer object containing the indirect clear color.
-    *
-    * @see create_ccs_buf_for_image
-    * @see RENDER_SURFACE_STATE.ClearValueAddress
-    */
-   struct brw_bo *clear_color_bo;
-
-   /**
-    * Offset into bo where the clear color can be found.
-    *
-    * @see create_ccs_buf_for_image
-    * @see RENDER_SURFACE_STATE.ClearValueAddress
-    */
-   uint32_t clear_color_offset;
-};
-
-struct brw_mipmap_tree
-{
-   struct isl_surf surf;
-
-   /**
-    * Buffer object containing the surface.
-    *
-    * @see brw_mipmap_tree::offset
-    * @see RENDER_SURFACE_STATE.SurfaceBaseAddress
-    * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
-    * @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
-    * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
-    * @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
-    */
-   struct brw_bo *bo;
-
-   /**
-    * @brief One of GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY, etc.
-    *
-    * @see RENDER_SURFACE_STATE.SurfaceType
-    * @see RENDER_SURFACE_STATE.SurfaceArray
-    * @see 3DSTATE_DEPTH_BUFFER.SurfaceType
-    */
-   GLenum target;
-
-   /**
-    * Generally, this is just the same as the gl_texture_image->TexFormat or
-    * gl_renderbuffer->Format.
-    *
-    * However, for textures and renderbuffers with packed depth/stencil formats
-    * on hardware where we want or need to use separate stencil, there will be
-    * two miptrees for storing the data.  If the depthstencil texture or rb is
-    * MESA_FORMAT_Z32_FLOAT_S8X24_UINT, then mt->format will be
-    * MESA_FORMAT_Z_FLOAT32, otherwise for MESA_FORMAT_Z24_UNORM_S8_UINT objects it will be
-    * MESA_FORMAT_Z24_UNORM_X8_UINT.
-    *
-    * @see RENDER_SURFACE_STATE.SurfaceFormat
-    * @see 3DSTATE_DEPTH_BUFFER.SurfaceFormat
-    */
-   mesa_format format;
-
-   GLuint first_level;
-   GLuint last_level;
-
-   /** Bytes per pixel (or bytes per block if compressed) */
-   GLuint cpp;
-
-   bool compressed;
-
-   /* Includes image offset tables: */
-   struct brw_mipmap_level level[MAX_TEXTURE_LEVELS];
-
-   /**
-    * Offset into bo where the surface starts.
-    *
-    * @see brw_mipmap_tree::bo
-    *
-    * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
-    * @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
-    * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
-    * @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
-    */
-   uint32_t offset;
-
-   /**
-    * \brief The type of auxiliary compression used by this miptree.
-    *
-    * This describes the type of auxiliary compression that is intended to be
-    * used by this miptree.  An aux usage of ISL_AUX_USAGE_NONE means that
-    * auxiliary compression is permanently disabled.  An aux usage other than
-    * ISL_AUX_USAGE_NONE does not imply that the auxiliary buffer has actually
-    * been allocated nor does it imply that auxiliary compression will always
-    * be enabled for this surface.  For instance, with CCS_D, we may allocate
-    * the CCS on-the-fly and it may not be used for texturing if the miptree
-    * is fully resolved.
-    */
-   enum isl_aux_usage aux_usage;
-
-   /**
-    * \brief Whether or not this miptree supports fast clears.
-    */
-   bool supports_fast_clear;
-
-   /**
-    * \brief Maps miptree slices to their current aux state
-    *
-    * This two-dimensional array is indexed as [level][layer] and stores an
-    * aux state for each slice.
-    */
-   enum isl_aux_state **aux_state;
-
-   /**
-    * \brief Stencil miptree for depthstencil textures.
-    *
-    * This miptree is used for depthstencil textures and renderbuffers that
-    * require separate stencil.  It always has the true copy of the stencil
-    * bits, regardless of mt->format.
-    *
-    * \see 3DSTATE_STENCIL_BUFFER
-    * \see brw_miptree_map_depthstencil()
-    * \see brw_miptree_unmap_depthstencil()
-    */
-   struct brw_mipmap_tree *stencil_mt;
-
-   /**
-    * \brief Shadow miptree for sampling when the main isn't supported by HW.
-    *
-    * To workaround various sampler bugs and limitations, we blit the main
-    * texture into a new texture that can be sampled.
-    *
-    * This miptree may be used for:
-    * - Stencil texturing (pre-BDW) as required by GL_ARB_stencil_texturing.
-    * - To store the decompressed ETC/EAC data in case we emulate the ETC
-    *   compression on Gen 7 or earlier GPUs.
-    */
-   struct brw_mipmap_tree *shadow_mt;
-   bool shadow_needs_update;
-
-   /**
-    * \brief CCS, MCS, or HiZ auxiliary buffer.
-    *
-    * NULL if no auxiliary buffer is in use for this surface.
-    *
-    * For single-sampled color miptrees:
-    *    This buffer contains the Color Control Surface, which stores the
-    *    necessary information to implement lossless color compression (CCS_E)
-    *    and "fast color clear" (CCS_D) behaviour.
-    *
-    * For multi-sampled color miptrees:
-    *    This buffer contains the Multisample Control Surface, which stores the
-    *    necessary information to implement compressed MSAA
-    *    (INTEL_MSAA_FORMAT_CMS).
-    *
-    * For depth miptrees:
-    *    This buffer contains the Hierarchical Depth Buffer, which stores the
-    *    necessary information to implement lossless depth compression and fast
-    *    depth clear behavior.
-    *
-    *    To determine if HiZ is enabled, do not check this pointer. Instead,
-    *    use brw_miptree_level_has_hiz().
-    */
-   struct brw_miptree_aux_buffer *aux_buf;
-
-   /**
-    * Planes 1 and 2 in case this is a planar surface.
-    */
-   struct brw_mipmap_tree *plane[2];
-
-   /**
-    * Fast clear color for this surface.  For depth surfaces, the clear value
-    * is stored as a float32 in the red component.
-    */
-   union isl_color_value fast_clear_color;
-
-   /**
-    * For external surfaces, this is DRM format modifier that was used to
-    * create or import the surface.  For internal surfaces, this will always
-    * be DRM_FORMAT_MOD_INVALID.
-    */
-   uint64_t drm_modifier;
-
-   /* These are also refcounted:
-    */
-   GLuint refcount;
-};
-
-bool
-brw_miptree_alloc_aux(struct brw_context *brw,
-                        struct brw_mipmap_tree *mt);
-
-enum brw_miptree_create_flags {
-   /** No miptree create flags */
-   MIPTREE_CREATE_DEFAULT  = 0,
-
-   /** Miptree creation should try to allocate a currently busy BO
-    *
-    * This may be advantageous if we know the next thing to touch the BO will
-    * be the GPU because the BO will likely already be in the GTT and maybe
-    * even in some caches.  If there is a chance that the next thing to touch
-    * the miptree BO will be the CPU, this flag should not be set.
-    */
-   MIPTREE_CREATE_BUSY     = 1 << 0,
-
-   /** Create the miptree with auxiliary compression disabled
-    *
-    * This does not prevent the caller of brw_miptree_create from coming
-    * along later and turning auxiliary compression back on but it does mean
-    * that the miptree will be created with mt->aux_usage == NONE.
-    */
-   MIPTREE_CREATE_NO_AUX   = 1 << 1,
-};
-
-struct brw_mipmap_tree *brw_miptree_create(struct brw_context *brw,
-                                           GLenum target,
-                                           mesa_format format,
-                                           GLuint first_level,
-                                           GLuint last_level,
-                                           GLuint width0,
-                                           GLuint height0,
-                                           GLuint depth0,
-                                           GLuint num_samples,
-                                           enum brw_miptree_create_flags flags);
-
-struct brw_mipmap_tree *
-brw_miptree_create_for_bo(struct brw_context *brw,
-                          struct brw_bo *bo,
-                          mesa_format format,
-                          uint32_t offset,
-                          uint32_t width,
-                          uint32_t height,
-                          uint32_t depth,
-                          int pitch,
-                          enum isl_tiling tiling,
-                          enum brw_miptree_create_flags flags);
-
-struct brw_mipmap_tree *
-brw_miptree_create_for_dri_image(struct brw_context *brw,
-                                 __DRIimage *image,
-                                 GLenum target,
-                                 mesa_format format,
-                                 bool allow_internal_aux);
-
-bool
-brw_update_winsys_renderbuffer_miptree(struct brw_context *intel,
-                                         struct brw_renderbuffer *irb,
-                                         struct brw_mipmap_tree *singlesample_mt,
-                                         uint32_t width, uint32_t height,
-                                         uint32_t pitch);
-
-/**
- * Create a miptree appropriate as the storage for a non-texture renderbuffer.
- * The miptree has the following properties:
- *     - The target is GL_TEXTURE_2D.
- *     - There are no levels other than the base level 0.
- *     - Depth is 1.
- */
-struct brw_mipmap_tree*
-brw_miptree_create_for_renderbuffer(struct brw_context *brw,
-                                    mesa_format format,
-                                    uint32_t width,
-                                    uint32_t height,
-                                    uint32_t num_samples);
-
-mesa_format
-brw_depth_format_for_depthstencil_format(mesa_format format);
-
-mesa_format
-brw_lower_compressed_format(struct brw_context *brw, mesa_format format);
-
-unsigned
-brw_get_num_logical_layers(const struct brw_mipmap_tree *mt, unsigned level);
-
-/** \brief Assert that the level and layer are valid for the miptree. */
-void
-brw_miptree_check_level_layer(const struct brw_mipmap_tree *mt,
-                                uint32_t level,
-                                uint32_t layer);
-
-void brw_miptree_reference(struct brw_mipmap_tree **dst,
-                           struct brw_mipmap_tree *src);
-
-void brw_miptree_release(struct brw_mipmap_tree **mt);
-
-/* Check if an image fits an existing mipmap tree layout
- */
-bool brw_miptree_match_image(struct brw_mipmap_tree *mt,
-                                    struct gl_texture_image *image);
-
-void
-brw_miptree_get_image_offset(const struct brw_mipmap_tree *mt,
-                             GLuint level, GLuint slice,
-                             GLuint *x, GLuint *y);
-
-enum isl_surf_dim
-get_isl_surf_dim(GLenum target);
-
-enum isl_dim_layout
-get_isl_dim_layout(const struct intel_device_info *devinfo,
-                   enum isl_tiling tiling, GLenum target);
-
-void
-brw_get_image_dims(struct gl_texture_image *image,
-                     int *width, int *height, int *depth);
-
-uint32_t
-brw_miptree_get_tile_offsets(const struct brw_mipmap_tree *mt,
-                               GLuint level, GLuint slice,
-                               uint32_t *tile_x,
-                               uint32_t *tile_y);
-uint32_t
-brw_miptree_get_aligned_offset(const struct brw_mipmap_tree *mt,
-                                 uint32_t x, uint32_t y);
-
-void
-brw_miptree_copy_slice(struct brw_context *brw,
-                         struct brw_mipmap_tree *src_mt,
-                         unsigned src_level, unsigned src_layer,
-                         struct brw_mipmap_tree *dst_mt,
-                         unsigned dst_level, unsigned dst_layer);
-
-void
-brw_miptree_copy_teximage(struct brw_context *brw,
-                            struct brw_texture_image *brw_image,
-                            struct brw_mipmap_tree *dst_mt);
-
-/**
- * \name Miptree HiZ functions
- * \{
- *
- * It is safe to call the "slice_set_need_resolve" and "slice_resolve"
- * functions on a miptree without HiZ. In that case, each function is a no-op.
- */
-
-bool
-brw_miptree_level_has_hiz(const struct brw_mipmap_tree *mt, uint32_t level);
-
-/**\}*/
-
-bool
-brw_miptree_has_color_unresolved(const struct brw_mipmap_tree *mt,
-                                   unsigned start_level, unsigned num_levels,
-                                   unsigned start_layer, unsigned num_layers);
-
-
-#define INTEL_REMAINING_LAYERS UINT32_MAX
-#define INTEL_REMAINING_LEVELS UINT32_MAX
-
-/** Prepare a miptree for access
- *
- * This function should be called prior to any access to miptree in order to
- * perform any needed resolves.
- *
- * \param[in]  start_level    The first mip level to be accessed
- *
- * \param[in]  num_levels     The number of miplevels to be accessed or
- *                            INTEL_REMAINING_LEVELS to indicate every level
- *                            above start_level will be accessed
- *
- * \param[in]  start_layer    The first array slice or 3D layer to be accessed
- *
- * \param[in]  num_layers     The number of array slices or 3D layers be
- *                            accessed or INTEL_REMAINING_LAYERS to indicate
- *                            every layer above start_layer will be accessed
- *
- * \param[in]  aux_supported  Whether or not the access will support the
- *                            miptree's auxiliary compression format;  this
- *                            must be false for uncompressed miptrees
- *
- * \param[in]  fast_clear_supported Whether or not the access will support
- *                                  fast clears in the miptree's auxiliary
- *                                  compression format
- */
-void
-brw_miptree_prepare_access(struct brw_context *brw,
-                             struct brw_mipmap_tree *mt,
-                             uint32_t start_level, uint32_t num_levels,
-                             uint32_t start_layer, uint32_t num_layers,
-                             enum isl_aux_usage aux_usage,
-                             bool fast_clear_supported);
-
-/** Complete a write operation
- *
- * This function should be called after any operation writes to a miptree.
- * This will update the miptree's compression state so that future resolves
- * happen correctly.  Technically, this function can be called before the
- * write occurs but the caller must ensure that they don't interlace
- * brw_miptree_prepare_access and brw_miptree_finish_write calls to
- * overlapping layer/level ranges.
- *
- * \param[in]  level             The mip level that was written
- *
- * \param[in]  start_layer       The first array slice or 3D layer written
- *
- * \param[in]  num_layers        The number of array slices or 3D layers
- *                               written or INTEL_REMAINING_LAYERS to indicate
- *                               every layer above start_layer was written
- *
- * \param[in]  written_with_aux  Whether or not the write was done with
- *                               auxiliary compression enabled
- */
-void
-brw_miptree_finish_write(struct brw_context *brw,
-                           struct brw_mipmap_tree *mt, uint32_t level,
-                           uint32_t start_layer, uint32_t num_layers,
-                           enum isl_aux_usage aux_usage);
-
-/** Get the auxiliary compression state of a miptree slice */
-enum isl_aux_state
-brw_miptree_get_aux_state(const struct brw_mipmap_tree *mt,
-                            uint32_t level, uint32_t layer);
-
-/** Set the auxiliary compression state of a miptree slice range
- *
- * This function directly sets the auxiliary compression state of a slice
- * range of a miptree.  It only modifies data structures and does not do any
- * resolves.  This should only be called by code which directly performs
- * compression operations such as fast clears and resolves.  Most code should
- * use brw_miptree_prepare_access or brw_miptree_finish_write.
- */
-void
-brw_miptree_set_aux_state(struct brw_context *brw,
-                            struct brw_mipmap_tree *mt, uint32_t level,
-                            uint32_t start_layer, uint32_t num_layers,
-                            enum isl_aux_state aux_state);
-
-/**
- * Prepare a miptree for raw access
- *
- * This helper prepares the miptree for access that knows nothing about any
- * sort of compression whatsoever.  This is useful when mapping the surface or
- * using it with the blitter.
- */
-static inline void
-brw_miptree_access_raw(struct brw_context *brw,
-                         struct brw_mipmap_tree *mt,
-                         uint32_t level, uint32_t layer,
-                         bool write)
-{
-   brw_miptree_prepare_access(brw, mt, level, 1, layer, 1,
-                                ISL_AUX_USAGE_NONE, false);
-   if (write)
-      brw_miptree_finish_write(brw, mt, level, layer, 1, ISL_AUX_USAGE_NONE);
-}
-
-enum isl_aux_usage
-brw_miptree_texture_aux_usage(struct brw_context *brw,
-                                struct brw_mipmap_tree *mt,
-                                enum isl_format view_format,
-                                enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits);
-void
-brw_miptree_prepare_texture(struct brw_context *brw,
-                              struct brw_mipmap_tree *mt,
-                              enum isl_format view_format,
-                              uint32_t start_level, uint32_t num_levels,
-                              uint32_t start_layer, uint32_t num_layers,
-                              enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits);
-void
-brw_miptree_prepare_image(struct brw_context *brw,
-                            struct brw_mipmap_tree *mt);
-
-enum isl_aux_usage
-brw_miptree_render_aux_usage(struct brw_context *brw,
-                               struct brw_mipmap_tree *mt,
-                               enum isl_format render_format,
-                               bool blend_enabled,
-                               bool draw_aux_disabled);
-void
-brw_miptree_prepare_render(struct brw_context *brw,
-                             struct brw_mipmap_tree *mt, uint32_t level,
-                             uint32_t start_layer, uint32_t layer_count,
-                             enum isl_aux_usage aux_usage);
-void
-brw_miptree_finish_render(struct brw_context *brw,
-                            struct brw_mipmap_tree *mt, uint32_t level,
-                            uint32_t start_layer, uint32_t layer_count,
-                            enum isl_aux_usage aux_usage);
-void
-brw_miptree_prepare_depth(struct brw_context *brw,
-                            struct brw_mipmap_tree *mt, uint32_t level,
-                            uint32_t start_layer, uint32_t layer_count);
-void
-brw_miptree_finish_depth(struct brw_context *brw,
-                           struct brw_mipmap_tree *mt, uint32_t level,
-                           uint32_t start_layer, uint32_t layer_count,
-                           bool depth_written);
-void
-brw_miptree_prepare_external(struct brw_context *brw,
-                               struct brw_mipmap_tree *mt);
-void
-brw_miptree_finish_external(struct brw_context *brw,
-                              struct brw_mipmap_tree *mt);
-
-void
-brw_miptree_make_shareable(struct brw_context *brw,
-                             struct brw_mipmap_tree *mt);
-
-void
-brw_miptree_updownsample(struct brw_context *brw,
-                           struct brw_mipmap_tree *src,
-                           struct brw_mipmap_tree *dst);
-
-void
-brw_update_r8stencil(struct brw_context *brw,
-                       struct brw_mipmap_tree *mt);
-
-void
-brw_miptree_map(struct brw_context *brw,
-                struct brw_mipmap_tree *mt,
-                unsigned int level,
-                unsigned int slice,
-                unsigned int x,
-                unsigned int y,
-                unsigned int w,
-                unsigned int h,
-                GLbitfield mode,
-                void **out_ptr,
-                ptrdiff_t *out_stride);
-
-void
-brw_miptree_unmap(struct brw_context *brw,
-                  struct brw_mipmap_tree *mt,
-                  unsigned int level,
-                  unsigned int slice);
-
-bool
-brw_miptree_sample_with_hiz(struct brw_context *brw,
-                              struct brw_mipmap_tree *mt);
-
-bool
-brw_miptree_set_clear_color(struct brw_context *brw,
-                              struct brw_mipmap_tree *mt,
-                              union isl_color_value clear_color);
-
-/* Get a clear color suitable for filling out an ISL surface state. */
-union isl_color_value
-brw_miptree_get_clear_color(const struct brw_mipmap_tree *mt,
-                              struct brw_bo **clear_color_bo,
-                              uint64_t *clear_color_offset);
-
-
-static inline int
-brw_miptree_blt_pitch(struct brw_mipmap_tree *mt)
-{
-   int pitch = mt->surf.row_pitch_B;
-   if (mt->surf.tiling != ISL_TILING_LINEAR)
-      pitch /= 4;
-   return pitch;
-}
-
-isl_memcpy_type
-brw_miptree_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type,
-                              uint32_t *cpp);
-
-static inline bool
-brw_miptree_needs_fake_etc(struct brw_context *brw,
-                             struct brw_mipmap_tree *mt)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   bool is_etc = _mesa_is_format_etc2(mt->format) ||
-                 (mt->format == MESA_FORMAT_ETC1_RGB8);
-
-   return devinfo->ver < 8 && devinfo->platform != INTEL_PLATFORM_BYT && is_etc;
-}
-
-static inline bool
-brw_miptree_has_etc_shadow(struct brw_context *brw,
-                             struct brw_mipmap_tree *mt)
-{
-   return brw_miptree_needs_fake_etc(brw, mt) && mt->shadow_mt;
-}
-
-void
-brw_miptree_update_etc_shadow_levels(struct brw_context *brw,
-                                       struct brw_mipmap_tree *mt);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
deleted file mode 100644
index 00aa82f..0000000
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ /dev/null
@@ -1,728 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-
-
-#include "brw_batch.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "compiler/brw_eu_defines.h"
-
-#include "main/framebuffer.h"
-#include "main/fbobject.h"
-#include "main/format_utils.h"
-#include "main/glformats.h"
-
-/**
- * Upload pointers to the per-stage state.
- *
- * The state pointers in this packet are all relative to the general state
- * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
- */
-static void
-upload_pipelined_state_pointers(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (devinfo->ver == 5) {
-      /* Need to flush before changing clip max threads for errata. */
-      BEGIN_BATCH(1);
-      OUT_BATCH(MI_FLUSH);
-      ADVANCE_BATCH();
-   }
-
-   BEGIN_BATCH(7);
-   OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
-   OUT_RELOC(brw->batch.state.bo, 0, brw->vs.base.state_offset);
-   if (brw->ff_gs.prog_active)
-      OUT_RELOC(brw->batch.state.bo, 0, brw->ff_gs.state_offset | 1);
-   else
-      OUT_BATCH(0);
-   OUT_RELOC(brw->batch.state.bo, 0, brw->clip.state_offset | 1);
-   OUT_RELOC(brw->batch.state.bo, 0, brw->sf.state_offset);
-   OUT_RELOC(brw->batch.state.bo, 0, brw->wm.base.state_offset);
-   OUT_RELOC(brw->batch.state.bo, 0, brw->cc.state_offset);
-   ADVANCE_BATCH();
-
-   brw->ctx.NewDriverState |= BRW_NEW_PSP;
-}
-
-static void
-upload_psp_urb_cbs(struct brw_context *brw)
-{
-   upload_pipelined_state_pointers(brw);
-   brw_upload_urb_fence(brw);
-   brw_upload_cs_urb_state(brw);
-}
-
-const struct brw_tracked_state brw_psp_urb_cbs = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_FF_GS_PROG_DATA |
-             BRW_NEW_GFX4_UNIT_STATE |
-             BRW_NEW_STATE_BASE_ADDRESS |
-             BRW_NEW_URB_FENCE,
-   },
-   .emit = upload_psp_urb_cbs,
-};
-
-uint32_t
-brw_depthbuffer_format(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
-   struct brw_renderbuffer *drb = brw_get_renderbuffer(fb, BUFFER_DEPTH);
-   struct brw_renderbuffer *srb;
-
-   if (!drb &&
-       (srb = brw_get_renderbuffer(fb, BUFFER_STENCIL)) &&
-       !srb->mt->stencil_mt &&
-       (brw_rb_format(srb) == MESA_FORMAT_Z24_UNORM_S8_UINT ||
-        brw_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_S8X24_UINT)) {
-      drb = srb;
-   }
-
-   if (!drb)
-      return BRW_DEPTHFORMAT_D32_FLOAT;
-
-   return brw_depth_format(brw, drb->mt->format);
-}
-
-static struct brw_mipmap_tree *
-get_stencil_miptree(struct brw_renderbuffer *irb)
-{
-   if (!irb)
-      return NULL;
-   if (irb->mt->stencil_mt)
-      return irb->mt->stencil_mt;
-   return brw_renderbuffer_get_mt(irb);
-}
-
-static bool
-rebase_depth_stencil(struct brw_context *brw, struct brw_renderbuffer *irb,
-                     bool invalidate)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-   uint32_t tile_mask_x = 0, tile_mask_y = 0;
-
-   isl_get_tile_masks(irb->mt->surf.tiling, irb->mt->cpp,
-                      &tile_mask_x, &tile_mask_y);
-   assert(!brw_miptree_level_has_hiz(irb->mt, irb->mt_level));
-
-   uint32_t tile_x = irb->draw_x & tile_mask_x;
-   uint32_t tile_y = irb->draw_y & tile_mask_y;
-
-   /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
-    * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
-    * Coordinate Offset X/Y":
-    *
-    *   "The 3 LSBs of both offsets must be zero to ensure correct
-    *   alignment"
-    */
-   bool rebase = tile_x & 7 || tile_y & 7;
-
-   /* We didn't even have intra-tile offsets before g45. */
-   rebase |= (!devinfo->has_surface_tile_offset && (tile_x || tile_y));
-
-   if (rebase) {
-      perf_debug("HW workaround: blitting depth level %d to a temporary "
-                 "to fix alignment (depth tile offset %d,%d)\n",
-                 irb->mt_level, tile_x, tile_y);
-      brw_renderbuffer_move_to_temp(brw, irb, invalidate);
-
-      /* There is now only single slice miptree. */
-      brw->depthstencil.tile_x = 0;
-      brw->depthstencil.tile_y = 0;
-      brw->depthstencil.depth_offset = 0;
-      return true;
-   }
-
-   /* While we just tried to get everything aligned, we may have failed to do
-    * so in the case of rendering to array or 3D textures, where nonzero faces
-    * will still have an offset post-rebase.  At least give an informative
-    * warning.
-    */
-   WARN_ONCE((tile_x & 7) || (tile_y & 7),
-             "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
-             "Truncating offset (%u:%u), bad rendering may occur.\n",
-             tile_x, tile_y);
-   tile_x &= ~7;
-   tile_y &= ~7;
-
-   brw->depthstencil.tile_x = tile_x;
-   brw->depthstencil.tile_y = tile_y;
-   brw->depthstencil.depth_offset = brw_miptree_get_aligned_offset(
-                                       irb->mt,
-                                       irb->draw_x & ~tile_mask_x,
-                                       irb->draw_y & ~tile_mask_y);
-
-   return false;
-}
-
-void
-brw_workaround_depthstencil_alignment(struct brw_context *brw,
-                                      GLbitfield clear_mask)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
-   struct brw_renderbuffer *depth_irb = brw_get_renderbuffer(fb, BUFFER_DEPTH);
-   struct brw_renderbuffer *stencil_irb = brw_get_renderbuffer(fb, BUFFER_STENCIL);
-   struct brw_mipmap_tree *depth_mt = NULL;
-   bool invalidate_depth = clear_mask & BUFFER_BIT_DEPTH;
-   bool invalidate_stencil = clear_mask & BUFFER_BIT_STENCIL;
-
-   if (depth_irb)
-      depth_mt = depth_irb->mt;
-
-   /* Initialize brw->depthstencil to 'nop' workaround state.
-    */
-   brw->depthstencil.tile_x = 0;
-   brw->depthstencil.tile_y = 0;
-   brw->depthstencil.depth_offset = 0;
-
-   /* Gfx6+ doesn't require the workarounds, since we always program the
-    * surface state at the start of the whole surface.
-    */
-   if (devinfo->ver >= 6)
-      return;
-
-   /* Check if depth buffer is in depth/stencil format.  If so, then it's only
-    * safe to invalidate it if we're also clearing stencil.
-    */
-   if (depth_irb && invalidate_depth &&
-      _mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL)
-      invalidate_depth = invalidate_stencil && stencil_irb;
-
-   if (depth_irb) {
-      if (rebase_depth_stencil(brw, depth_irb, invalidate_depth)) {
-         /* In the case of stencil_irb being the same packed depth/stencil
-          * texture but not the same rb, make it point at our rebased mt, too.
-          */
-         if (stencil_irb &&
-             stencil_irb != depth_irb &&
-             stencil_irb->mt == depth_mt) {
-            brw_miptree_reference(&stencil_irb->mt, depth_irb->mt);
-            brw_renderbuffer_set_draw_offset(stencil_irb);
-         }
-      }
-
-      if (stencil_irb) {
-         assert(stencil_irb->mt == depth_irb->mt);
-         assert(stencil_irb->mt_level == depth_irb->mt_level);
-         assert(stencil_irb->mt_layer == depth_irb->mt_layer);
-      }
-   }
-
-   /* If there is no depth attachment, consider if stencil needs rebase. */
-   if (!depth_irb && stencil_irb)
-       rebase_depth_stencil(brw, stencil_irb, invalidate_stencil);
-}
-
-static void
-brw_emit_depth_stencil_hiz(struct brw_context *brw,
-                           struct brw_renderbuffer *depth_irb,
-                           struct brw_mipmap_tree *depth_mt,
-                           struct brw_renderbuffer *stencil_irb,
-                           struct brw_mipmap_tree *stencil_mt)
-{
-   uint32_t tile_x = brw->depthstencil.tile_x;
-   uint32_t tile_y = brw->depthstencil.tile_y;
-   uint32_t depth_surface_type = BRW_SURFACE_NULL;
-   uint32_t depthbuffer_format = BRW_DEPTHFORMAT_D32_FLOAT;
-   uint32_t depth_offset = 0;
-   uint32_t width = 1, height = 1;
-   bool tiled_surface = true;
-
-   /* If there's a packed depth/stencil bound to stencil only, we need to
-    * emit the packed depth/stencil buffer packet.
-    */
-   if (!depth_irb && stencil_irb) {
-      depth_irb = stencil_irb;
-      depth_mt = stencil_mt;
-   }
-
-   if (depth_irb && depth_mt) {
-      depthbuffer_format = brw_depthbuffer_format(brw);
-      depth_surface_type = BRW_SURFACE_2D;
-      depth_offset = brw->depthstencil.depth_offset;
-      width = depth_irb->Base.Base.Width;
-      height = depth_irb->Base.Base.Height;
-      tiled_surface = depth_mt->surf.tiling != ISL_TILING_LINEAR;
-   }
-
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const unsigned len = (devinfo->verx10 == 45 || devinfo->ver == 5) ? 6 : 5;
-
-   BEGIN_BATCH(len);
-   OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
-   OUT_BATCH((depth_mt ? depth_mt->surf.row_pitch_B - 1 : 0) |
-             (depthbuffer_format << 18) |
-             (BRW_TILEWALK_YMAJOR << 26) |
-             (tiled_surface << 27) |
-             (depth_surface_type << 29));
-
-   if (depth_mt) {
-      OUT_RELOC(depth_mt->bo, RELOC_WRITE, depth_offset);
-   } else {
-      OUT_BATCH(0);
-   }
-
-   OUT_BATCH(((width + tile_x - 1) << 6) |
-             ((height + tile_y - 1) << 19));
-   OUT_BATCH(0);
-
-   if (devinfo->verx10 >= 45)
-      OUT_BATCH(tile_x | (tile_y << 16));
-   else
-      assert(tile_x == 0 && tile_y == 0);
-
-   if (devinfo->ver >= 6)
-      OUT_BATCH(0);
-
-   ADVANCE_BATCH();
-}
-
-void
-brw_emit_depthbuffer(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
-   /* _NEW_BUFFERS */
-   struct brw_renderbuffer *depth_irb = brw_get_renderbuffer(fb, BUFFER_DEPTH);
-   struct brw_renderbuffer *stencil_irb = brw_get_renderbuffer(fb, BUFFER_STENCIL);
-   struct brw_mipmap_tree *depth_mt = brw_renderbuffer_get_mt(depth_irb);
-   struct brw_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb);
-
-   if (depth_mt)
-      brw_cache_flush_for_depth(brw, depth_mt->bo);
-   if (stencil_mt)
-      brw_cache_flush_for_depth(brw, stencil_mt->bo);
-
-   if (devinfo->ver < 6) {
-      brw_emit_depth_stencil_hiz(brw, depth_irb, depth_mt,
-                                 stencil_irb, stencil_mt);
-      return;
-   }
-
-   /* Skip repeated NULL depth/stencil emits (think 2D rendering). */
-   if (!depth_mt && !stencil_mt && brw->no_depth_or_stencil) {
-      assert(brw->hw_ctx);
-      return;
-   }
-
-   brw_emit_depth_stall_flushes(brw);
-
-   const unsigned ds_dwords = brw->isl_dev.ds.size / 4;
-   brw_batch_begin(brw, ds_dwords);
-   uint32_t *ds_map = brw->batch.map_next;
-   const uint32_t ds_offset = (char *)ds_map - (char *)brw->batch.batch.map;
-
-   struct isl_view view = {
-      /* Some nice defaults */
-      .base_level = 0,
-      .levels = 1,
-      .base_array_layer = 0,
-      .array_len = 1,
-      .swizzle = ISL_SWIZZLE_IDENTITY,
-   };
-
-   struct isl_depth_stencil_hiz_emit_info info = {
-      .view = &view,
-      .mocs = brw_mocs(&brw->isl_dev, NULL),
-   };
-
-   if (depth_mt) {
-      view.usage |= ISL_SURF_USAGE_DEPTH_BIT;
-      info.depth_surf = &depth_mt->surf;
-
-      info.depth_address =
-         brw_batch_reloc(&brw->batch,
-                         ds_offset + brw->isl_dev.ds.depth_offset,
-                         depth_mt->bo, depth_mt->offset, RELOC_WRITE);
-
-      info.mocs = brw_mocs(&brw->isl_dev, depth_mt->bo);
-      view.base_level = depth_irb->mt_level - depth_irb->mt->first_level;
-      view.base_array_layer = depth_irb->mt_layer;
-      view.array_len = MAX2(depth_irb->layer_count, 1);
-      view.format = depth_mt->surf.format;
-
-      info.hiz_usage = depth_mt->aux_usage;
-      if (!brw_renderbuffer_has_hiz(depth_irb)) {
-         /* Just because a miptree has ISL_AUX_USAGE_HIZ does not mean that
-          * all miplevels of that miptree are guaranteed to support HiZ.  See
-          * brw_miptree_level_enable_hiz for details.
-          */
-         info.hiz_usage = ISL_AUX_USAGE_NONE;
-      }
-
-      if (info.hiz_usage == ISL_AUX_USAGE_HIZ) {
-         info.hiz_surf = &depth_mt->aux_buf->surf;
-
-         uint64_t hiz_offset = 0;
-         if (devinfo->ver == 6) {
-            /* HiZ surfaces on Sandy Bridge technically don't support
-             * mip-mapping.  However, we can fake it by offsetting to the
-             * first slice of LOD0 in the HiZ surface.
-             */
-            isl_surf_get_image_offset_B_tile_sa(&depth_mt->aux_buf->surf,
-                                                view.base_level, 0, 0,
-                                                &hiz_offset, NULL, NULL);
-         }
-
-         info.hiz_address =
-            brw_batch_reloc(&brw->batch,
-                            ds_offset + brw->isl_dev.ds.hiz_offset,
-                            depth_mt->aux_buf->bo,
-                            depth_mt->aux_buf->offset + hiz_offset,
-                            RELOC_WRITE);
-      }
-
-      info.depth_clear_value = depth_mt->fast_clear_color.f32[0];
-   }
-
-   if (stencil_mt) {
-      view.usage |= ISL_SURF_USAGE_STENCIL_BIT;
-      info.stencil_surf = &stencil_mt->surf;
-
-      if (!depth_mt) {
-         info.mocs = brw_mocs(&brw->isl_dev, stencil_mt->bo);
-         view.base_level = stencil_irb->mt_level - stencil_irb->mt->first_level;
-         view.base_array_layer = stencil_irb->mt_layer;
-         view.array_len = MAX2(stencil_irb->layer_count, 1);
-         view.format = stencil_mt->surf.format;
-      }
-
-      uint64_t stencil_offset = 0;
-      if (devinfo->ver == 6) {
-         /* Stencil surfaces on Sandy Bridge technically don't support
-          * mip-mapping.  However, we can fake it by offsetting to the
-          * first slice of LOD0 in the stencil surface.
-          */
-         isl_surf_get_image_offset_B_tile_sa(&stencil_mt->surf,
-                                             view.base_level, 0, 0,
-                                             &stencil_offset, NULL, NULL);
-      }
-
-      info.stencil_address =
-         brw_batch_reloc(&brw->batch,
-                         ds_offset + brw->isl_dev.ds.stencil_offset,
-                         stencil_mt->bo,
-                         stencil_mt->offset + stencil_offset,
-                         RELOC_WRITE);
-   }
-
-   isl_emit_depth_stencil_hiz_s(&brw->isl_dev, ds_map, &info);
-
-   brw->batch.map_next += ds_dwords;
-   brw_batch_advance(brw);
-
-   brw->no_depth_or_stencil = !depth_mt && !stencil_mt;
-}
-
-const struct brw_tracked_state brw_depthbuffer = {
-   .dirty = {
-      .mesa = _NEW_BUFFERS,
-      .brw = BRW_NEW_AUX_STATE |
-             BRW_NEW_BATCH |
-             BRW_NEW_BLORP,
-   },
-   .emit = brw_emit_depthbuffer,
-};
-
-void
-brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const bool is_965 = devinfo->verx10 == 40;
-   const uint32_t _3DSTATE_PIPELINE_SELECT =
-      is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45;
-
-   if (devinfo->ver >= 8 && devinfo->ver < 10) {
-      /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
-       *
-       *   Software must clear the COLOR_CALC_STATE Valid field in
-       *   3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT
-       *   with Pipeline Select set to GPGPU.
-       *
-       * The internal hardware docs recommend the same workaround for Gfx9
-       * hardware too.
-       */
-      if (pipeline == BRW_COMPUTE_PIPELINE) {
-         BEGIN_BATCH(2);
-         OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
-         OUT_BATCH(0);
-         ADVANCE_BATCH();
-
-         brw->ctx.NewDriverState |= BRW_NEW_CC_STATE;
-      }
-   }
-
-   if (devinfo->ver == 9 && pipeline == BRW_RENDER_PIPELINE) {
-      /* We seem to have issues with geometry flickering when 3D and compute
-       * are combined in the same batch and this appears to fix it.
-       */
-      const uint32_t maxNumberofThreads =
-         devinfo->max_cs_threads * devinfo->subslice_total - 1;
-
-      BEGIN_BATCH(9);
-      OUT_BATCH(MEDIA_VFE_STATE << 16 | (9 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(2 << 8 | maxNumberofThreads << 16);
-      OUT_BATCH(0);
-      OUT_BATCH(2 << 16);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   if (devinfo->ver >= 6) {
-      /* From "BXML Â» GT Â» MI Â» vol1a GPU Overview Â» [Instruction]
-       * PIPELINE_SELECT [DevBWR+]":
-       *
-       *   Project: DEVSNB+
-       *
-       *   Software must ensure all the write caches are flushed through a
-       *   stalling PIPE_CONTROL command followed by another PIPE_CONTROL
-       *   command to invalidate read only caches prior to programming
-       *   MI_PIPELINE_SELECT command to change the Pipeline Select Mode.
-       */
-      const unsigned dc_flush =
-         devinfo->ver >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
-
-      brw_emit_pipe_control_flush(brw,
-                                  PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                                  dc_flush |
-                                  PIPE_CONTROL_CS_STALL);
-
-      brw_emit_pipe_control_flush(brw,
-                                  PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
-                                  PIPE_CONTROL_CONST_CACHE_INVALIDATE |
-                                  PIPE_CONTROL_STATE_CACHE_INVALIDATE |
-                                  PIPE_CONTROL_INSTRUCTION_INVALIDATE);
-
-   } else {
-      /* From "BXML Â» GT Â» MI Â» vol1a GPU Overview Â» [Instruction]
-       * PIPELINE_SELECT [DevBWR+]":
-       *
-       *   Project: PRE-DEVSNB
-       *
-       *   Software must ensure the current pipeline is flushed via an
-       *   MI_FLUSH or PIPE_CONTROL prior to the execution of PIPELINE_SELECT.
-       */
-      BEGIN_BATCH(1);
-      OUT_BATCH(MI_FLUSH);
-      ADVANCE_BATCH();
-   }
-
-   /* Select the pipeline */
-   BEGIN_BATCH(1);
-   OUT_BATCH(_3DSTATE_PIPELINE_SELECT << 16 |
-             (devinfo->ver >= 9 ? (3 << 8) : 0) |
-             (pipeline == BRW_COMPUTE_PIPELINE ? 2 : 0));
-   ADVANCE_BATCH();
-
-   if (devinfo->verx10 == 70 &&
-       pipeline == BRW_RENDER_PIPELINE) {
-      /* From "BXML Â» GT Â» MI Â» vol1a GPU Overview Â» [Instruction]
-       * PIPELINE_SELECT [DevBWR+]":
-       *
-       *   Project: DEVIVB, DEVHSW:GT3:A0
-       *
-       *   Software must send a pipe_control with a CS stall and a post sync
-       *   operation and then a dummy DRAW after every MI_SET_CONTEXT and
-       *   after any PIPELINE_SELECT that is enabling 3D mode.
-       */
-      gfx7_emit_cs_stall_flush(brw);
-
-      BEGIN_BATCH(7);
-      OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
-      OUT_BATCH(_3DPRIM_POINTLIST);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   if (devinfo->platform == INTEL_PLATFORM_GLK) {
-      /* Project: DevGLK
-       *
-       * "This chicken bit works around a hardware issue with barrier logic
-       *  encountered when switching between GPGPU and 3D pipelines.  To
-       *  workaround the issue, this mode bit should be set after a pipeline
-       *  is selected."
-       */
-      const unsigned barrier_mode =
-         pipeline == BRW_RENDER_PIPELINE ? GLK_SCEC_BARRIER_MODE_3D_HULL
-                                         : GLK_SCEC_BARRIER_MODE_GPGPU;
-      brw_load_register_imm32(brw, SLICE_COMMON_ECO_CHICKEN1,
-                              barrier_mode | GLK_SCEC_BARRIER_MODE_MASK);
-   }
-}
-
-/**
- * Update the pixel hashing modes that determine the balancing of PS threads
- * across subslices and slices.
- *
- * \param width Width bound of the rendering area (already scaled down if \p
- *              scale is greater than 1).
- * \param height Height bound of the rendering area (already scaled down if \p
- *               scale is greater than 1).
- * \param scale The number of framebuffer samples that could potentially be
- *              affected by an individual channel of the PS thread.  This is
- *              typically one for single-sampled rendering, but for operations
- *              like CCS resolves and fast clears a single PS invocation may
- *              update a huge number of pixels, in which case a finer
- *              balancing is desirable in order to maximally utilize the
- *              bandwidth available.  UINT_MAX can be used as shorthand for
- *              "finest hashing mode available".
- */
-void
-brw_emit_hashing_mode(struct brw_context *brw, unsigned width,
-                      unsigned height, unsigned scale)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (devinfo->ver == 9) {
-      const uint32_t slice_hashing[] = {
-         /* Because all Gfx9 platforms with more than one slice require
-          * three-way subslice hashing, a single "normal" 16x16 slice hashing
-          * block is guaranteed to suffer from substantial imbalance, with one
-          * subslice receiving twice as much work as the other two in the
-          * slice.
-          *
-          * The performance impact of that would be particularly severe when
-          * three-way hashing is also in use for slice balancing (which is the
-          * case for all Gfx9 GT4 platforms), because one of the slices
-          * receives one every three 16x16 blocks in either direction, which
-          * is roughly the periodicity of the underlying subslice imbalance
-          * pattern ("roughly" because in reality the hardware's
-          * implementation of three-way hashing doesn't do exact modulo 3
-          * arithmetic, which somewhat decreases the magnitude of this effect
-          * in practice).  This leads to a systematic subslice imbalance
-          * within that slice regardless of the size of the primitive.  The
-          * 32x32 hashing mode guarantees that the subslice imbalance within a
-          * single slice hashing block is minimal, largely eliminating this
-          * effect.
-          */
-         GFX9_SLICE_HASHING_32x32,
-         /* Finest slice hashing mode available. */
-         GFX9_SLICE_HASHING_NORMAL
-      };
-      const uint32_t subslice_hashing[] = {
-         /* The 16x16 subslice hashing mode is used on non-LLC platforms to
-          * match the performance of previous Mesa versions.  16x16 has a
-          * slight cache locality benefit especially visible in the sampler L1
-          * cache efficiency of low-bandwidth platforms, but it comes at the
-          * cost of greater subslice imbalance for primitives of dimensions
-          * approximately intermediate between 16x4 and 16x16.
-          */
-         (devinfo->has_llc ? GFX9_SUBSLICE_HASHING_16x4 :
-                             GFX9_SUBSLICE_HASHING_16x16),
-         /* Finest subslice hashing mode available. */
-         GFX9_SUBSLICE_HASHING_8x4
-      };
-      /* Dimensions of the smallest hashing block of a given hashing mode.  If
-       * the rendering area is smaller than this there can't possibly be any
-       * benefit from switching to this mode, so we optimize out the
-       * transition.
-       */
-      const unsigned min_size[][2] = {
-         { 16, 4 },
-         { 8, 4 }
-      };
-      const unsigned idx = scale > 1;
-
-      if (width > min_size[idx][0] || height > min_size[idx][1]) {
-         const uint32_t gt_mode =
-            (devinfo->num_slices == 1 ? 0 :
-             GFX9_SLICE_HASHING_MASK_BITS | slice_hashing[idx]) |
-            GFX9_SUBSLICE_HASHING_MASK_BITS | subslice_hashing[idx];
-
-         brw_emit_pipe_control_flush(brw,
-                                     PIPE_CONTROL_STALL_AT_SCOREBOARD |
-                                     PIPE_CONTROL_CS_STALL);
-
-         brw_load_register_imm32(brw, GFX7_GT_MODE, gt_mode);
-
-         brw->current_hash_scale = scale;
-      }
-   }
-}
-
-/**
- * Misc invariant state packets
- */
-void
-brw_upload_invariant_state(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const bool is_965 = devinfo->verx10 == 40;
-
-   brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE);
-   brw->last_pipeline = BRW_RENDER_PIPELINE;
-
-   if (devinfo->ver >= 8) {
-      BEGIN_BATCH(3);
-      OUT_BATCH(CMD_STATE_SIP << 16 | (3 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   } else {
-      BEGIN_BATCH(2);
-      OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* Original Gfx4 doesn't have 3DSTATE_AA_LINE_PARAMETERS. */
-   if (!is_965) {
-      BEGIN_BATCH(3);
-      OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
-      /* use legacy aa line coverage computation */
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/brw_multisample_state.h b/src/mesa/drivers/dri/i965/brw_multisample_state.h
deleted file mode 100644
index 2142a17..0000000
--- a/src/mesa/drivers/dri/i965/brw_multisample_state.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright Â© 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_MULTISAMPLE_STATE_H
-#define BRW_MULTISAMPLE_STATE_H
-
-#include <stdint.h>
-
-/**
- * Note: There are no standard multisample positions defined in OpenGL
- * specifications. Implementations have the freedom to pick the positions
- * which give plausible results. But the Vulkan specification does define
- * standard sample positions. So, we decided to pick the same pattern in
- * OpenGL as in Vulkan to keep it uniform across drivers and also to avoid
- * breaking applications which rely on this standard pattern.
- */
-
-/**
- * 1x MSAA has a single sample at the center: (0.5, 0.5) -> (0x8, 0x8).
- *
- * 2x MSAA sample positions are (0.75, 0.75) and (0.25, 0.25):
- *   4 c
- * 4 1
- * c   0
- */
-static const uint32_t
-brw_multisample_positions_1x_2x = 0x008844cc;
-
-/**
- * Sample positions:
- *   2 6 a e
- * 2   0
- * 6       1
- * a 2
- * e     3
- */
-static const uint32_t
-brw_multisample_positions_4x = 0xae2ae662;
-
-/**
- * Sample positions:
- *
- * From the Ivy Bridge PRM, Vol2 Part1 p304 (3DSTATE_MULTISAMPLE:
- * Programming Notes):
- *     "When programming the sample offsets (for NUMSAMPLES_4 or _8 and
- *     MSRASTMODE_xxx_PATTERN), the order of the samples 0 to 3 (or 7
- *     for 8X) must have monotonically increasing distance from the
- *     pixel center. This is required to get the correct centroid
- *     computation in the device."
- *
- * Sample positions:
- *   1 3 5 7 9 b d f
- * 1               7
- * 3     3
- * 5         0
- * 7 5
- * 9             2
- * b       1
- * d   4
- * f           6
- */
-static const uint32_t
-brw_multisample_positions_8x[] = { 0x53d97b95, 0xf1bf173d };
-
-/**
- * Sample positions:
- *
- *    0 1 2 3 4 5 6 7 8 9 a b c d e f
- * 0   15
- * 1                  9
- * 2         10
- * 3                        7
- * 4                               13
- * 5                1
- * 6        4
- * 7                          3
- * 8 12
- * 9                    0
- * a            2
- * b                            6
- * c     11
- * d                      5
- * e              8
- * f                             14
- */
-static const uint32_t
-brw_multisample_positions_16x[] = {
-   0xc75a7599, 0xb3dbad36, 0x2c42816e, 0x10eff408
-};
-
-#endif /* BRW_MULTISAMPLE_STATE_H */
diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
deleted file mode 100644
index 8ef67ff..0000000
--- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
+++ /dev/null
@@ -1,450 +0,0 @@
-/*
- * Copyright Â© 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "compiler/brw_nir.h"
-#include "compiler/glsl/ir_uniform.h"
-#include "compiler/nir/nir_builder.h"
-#include "brw_program.h"
-
-static void
-brw_nir_setup_glsl_builtin_uniform(nir_variable *var,
-                                   const struct gl_program *prog,
-                                   struct brw_stage_prog_data *stage_prog_data,
-                                   bool is_scalar)
-{
-   const nir_state_slot *const slots = var->state_slots;
-   assert(var->state_slots != NULL);
-
-   unsigned uniform_index = var->data.driver_location / 4;
-   for (unsigned int i = 0; i < var->num_state_slots; i++) {
-      /* This state reference has already been setup by ir_to_mesa, but we'll
-       * get the same index back here.
-       */
-      int index = _mesa_add_state_reference(prog->Parameters,
-					    slots[i].tokens);
-
-      /* Add each of the unique swizzles of the element as a parameter.
-       * This'll end up matching the expected layout of the
-       * array/matrix/structure we're trying to fill in.
-       */
-      int last_swiz = -1;
-      for (unsigned j = 0; j < 4; j++) {
-         int swiz = GET_SWZ(slots[i].swizzle, j);
-
-         /* If we hit a pair of identical swizzles, this means we've hit the
-          * end of the builtin variable.  In scalar mode, we should just quit
-          * and move on to the next one.  In vec4, we need to continue and pad
-          * it out to 4 components.
-          */
-         if (swiz == last_swiz && is_scalar)
-            break;
-
-         last_swiz = swiz;
-
-         stage_prog_data->param[uniform_index++] =
-            BRW_PARAM_PARAMETER(index, swiz);
-      }
-   }
-}
-
-static void
-setup_vec4_image_param(uint32_t *params, uint32_t idx,
-                       unsigned offset, unsigned n)
-{
-   assert(offset % sizeof(uint32_t) == 0);
-   for (unsigned i = 0; i < n; ++i)
-      params[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
-
-   for (unsigned i = n; i < 4; ++i)
-      params[i] = BRW_PARAM_BUILTIN_ZERO;
-}
-
-static void
-brw_setup_image_uniform_values(nir_variable *var,
-                               struct brw_stage_prog_data *prog_data)
-{
-   unsigned param_start_index = var->data.driver_location / 4;
-   uint32_t *param = &prog_data->param[param_start_index];
-   unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size());
-
-   for (unsigned i = 0; i < num_images; i++) {
-      const unsigned image_idx = var->data.binding + i;
-
-      /* Upload the brw_image_param structure.  The order is expected to match
-       * the BRW_IMAGE_PARAM_*_OFFSET defines.
-       */
-      setup_vec4_image_param(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
-                             image_idx,
-                             offsetof(brw_image_param, offset), 2);
-      setup_vec4_image_param(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
-                             image_idx,
-                             offsetof(brw_image_param, size), 3);
-      setup_vec4_image_param(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
-                             image_idx,
-                             offsetof(brw_image_param, stride), 4);
-      setup_vec4_image_param(param + BRW_IMAGE_PARAM_TILING_OFFSET,
-                             image_idx,
-                             offsetof(brw_image_param, tiling), 3);
-      setup_vec4_image_param(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
-                             image_idx,
-                             offsetof(brw_image_param, swizzling), 2);
-      param += BRW_IMAGE_PARAM_SIZE;
-   }
-}
-
-static unsigned
-count_uniform_storage_slots(const struct glsl_type *type)
-{
-   /* gl_uniform_storage can cope with one level of array, so if the
-    * type is a composite type or an array where each element occupies
-    * more than one slot than we need to recursively process it.
-    */
-   if (glsl_type_is_struct_or_ifc(type)) {
-      unsigned location_count = 0;
-
-      for (unsigned i = 0; i < glsl_get_length(type); i++) {
-         const struct glsl_type *field_type = glsl_get_struct_field(type, i);
-
-         location_count += count_uniform_storage_slots(field_type);
-      }
-
-      return location_count;
-   }
-
-   if (glsl_type_is_array(type)) {
-      const struct glsl_type *element_type = glsl_get_array_element(type);
-
-      if (glsl_type_is_array(element_type) ||
-          glsl_type_is_struct_or_ifc(element_type)) {
-         unsigned element_count = count_uniform_storage_slots(element_type);
-         return element_count * glsl_get_length(type);
-      }
-   }
-
-   return 1;
-}
-
-static void
-brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
-                           const struct gl_program *prog,
-                           struct brw_stage_prog_data *stage_prog_data,
-                           bool is_scalar)
-{
-   if (var->type->without_array()->is_sampler() ||
-       var->type->without_array()->is_image())
-      return;
-
-   /* The data for our (non-builtin) uniforms is stored in a series of
-    * gl_uniform_storage structs for each subcomponent that
-    * glGetUniformLocation() could name.  We know it's been set up in the same
-    * order we'd walk the type, so walk the list of storage that matches the
-    * range of slots covered by this variable.
-    */
-   unsigned uniform_index = var->data.driver_location / 4;
-   unsigned num_slots = count_uniform_storage_slots(var->type);
-   for (unsigned u = 0; u < num_slots; u++) {
-      struct gl_uniform_storage *storage =
-         &prog->sh.data->UniformStorage[var->data.location + u];
-
-      /* We already handled samplers and images via the separate top-level
-       * variables created by gl_nir_lower_samplers_as_deref(), but they're
-       * still part of the structure's storage, and so we'll see them while
-       * walking it to set up the other regular fields.  Just skip over them.
-       */
-      if (storage->builtin ||
-          storage->type->is_sampler() ||
-          storage->type->is_image())
-         continue;
-
-      gl_constant_value *components = storage->storage;
-      unsigned vector_count = (MAX2(storage->array_elements, 1) *
-                               storage->type->matrix_columns);
-      unsigned vector_size = storage->type->vector_elements;
-      unsigned max_vector_size = 4;
-      if (storage->type->base_type == GLSL_TYPE_DOUBLE ||
-          storage->type->base_type == GLSL_TYPE_UINT64 ||
-          storage->type->base_type == GLSL_TYPE_INT64) {
-         vector_size *= 2;
-         if (vector_size > 4)
-            max_vector_size = 8;
-      }
-
-      for (unsigned s = 0; s < vector_count; s++) {
-         unsigned i;
-         for (i = 0; i < vector_size; i++) {
-            uint32_t idx = components - prog->sh.data->UniformDataSlots;
-            stage_prog_data->param[uniform_index++] = BRW_PARAM_UNIFORM(idx);
-            components++;
-         }
-
-         if (!is_scalar) {
-            /* Pad out with zeros if needed (only needed for vec4) */
-            for (; i < max_vector_size; i++) {
-               stage_prog_data->param[uniform_index++] =
-                  BRW_PARAM_BUILTIN_ZERO;
-            }
-         }
-      }
-   }
-}
-
-void
-brw_nir_setup_glsl_uniforms(void *mem_ctx, nir_shader *shader,
-                            const struct gl_program *prog,
-                            struct brw_stage_prog_data *stage_prog_data,
-                            bool is_scalar)
-{
-   unsigned nr_params = shader->num_uniforms / 4;
-   stage_prog_data->nr_params = nr_params;
-   stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
-
-   nir_foreach_uniform_variable(var, shader) {
-      /* UBO's, atomics and samplers don't take up space in the
-         uniform file */
-      if (var->interface_type != NULL || var->type->contains_atomic())
-         continue;
-
-      if (var->num_state_slots > 0) {
-         brw_nir_setup_glsl_builtin_uniform(var, prog, stage_prog_data,
-                                            is_scalar);
-      } else {
-         brw_nir_setup_glsl_uniform(shader->info.stage, var, prog,
-                                    stage_prog_data, is_scalar);
-      }
-   }
-
-   nir_foreach_image_variable(var, shader)
-      brw_setup_image_uniform_values(var, stage_prog_data);
-}
-
-void
-brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
-                           struct gl_program *prog,
-                           struct brw_stage_prog_data *stage_prog_data)
-{
-   struct gl_program_parameter_list *plist = prog->Parameters;
-
-   unsigned nr_params = plist->NumParameters * 4;
-   stage_prog_data->nr_params = nr_params;
-   stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
-
-   /* For ARB programs, prog_to_nir generates a single "parameters" variable
-    * for all uniform data.  There may be additional sampler variables, and
-    * an extra uniform from nir_lower_wpos_ytransform.
-    */
-
-   for (unsigned p = 0; p < plist->NumParameters; p++) {
-      /* Parameters should be either vec4 uniforms or single component
-       * constants; matrices and other larger types should have been broken
-       * down earlier.
-       */
-      assert(plist->Parameters[p].Size <= 4);
-
-      unsigned i;
-      for (i = 0; i < plist->Parameters[p].Size; i++)
-         stage_prog_data->param[4 * p + i] = BRW_PARAM_PARAMETER(p, i);
-      for (; i < 4; i++)
-         stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO;
-   }
-}
-
-static nir_ssa_def *
-get_aoa_deref_offset(nir_builder *b,
-                     nir_deref_instr *deref,
-                     unsigned elem_size)
-{
-   unsigned array_size = elem_size;
-   nir_ssa_def *offset = nir_imm_int(b, 0);
-
-   while (deref->deref_type != nir_deref_type_var) {
-      assert(deref->deref_type == nir_deref_type_array);
-
-      /* This level's element size is the previous level's array size */
-      nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
-      assert(deref->arr.index.ssa);
-      offset = nir_iadd(b, offset,
-                           nir_imul(b, index, nir_imm_int(b, array_size)));
-
-      deref = nir_deref_instr_parent(deref);
-      assert(glsl_type_is_array(deref->type));
-      array_size *= glsl_get_length(deref->type);
-   }
-
-   /* Accessing an invalid surface index with the dataport can result in a
-    * hang.  According to the spec "if the index used to select an individual
-    * element is negative or greater than or equal to the size of the array,
-    * the results of the operation are undefined but may not lead to
-    * termination" -- which is one of the possible outcomes of the hang.
-    * Clamp the index to prevent access outside of the array bounds.
-    */
-   return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
-}
-
-void
-brw_nir_lower_gl_images(nir_shader *shader,
-                        const struct gl_program *prog)
-{
-   /* We put image uniforms at the end */
-   nir_foreach_image_variable(var, shader) {
-      const unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size());
-
-      var->data.driver_location = shader->num_uniforms;
-      shader->num_uniforms += num_images * BRW_IMAGE_PARAM_SIZE * 4;
-   }
-
-   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
-
-   nir_builder b;
-   nir_builder_init(&b, impl);
-
-   nir_foreach_block(block, impl) {
-      nir_foreach_instr_safe(instr, block) {
-         if (instr->type != nir_instr_type_intrinsic)
-            continue;
-
-         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-         switch (intrin->intrinsic) {
-         case nir_intrinsic_image_deref_load:
-         case nir_intrinsic_image_deref_store:
-         case nir_intrinsic_image_deref_atomic_add:
-         case nir_intrinsic_image_deref_atomic_imin:
-         case nir_intrinsic_image_deref_atomic_umin:
-         case nir_intrinsic_image_deref_atomic_imax:
-         case nir_intrinsic_image_deref_atomic_umax:
-         case nir_intrinsic_image_deref_atomic_and:
-         case nir_intrinsic_image_deref_atomic_or:
-         case nir_intrinsic_image_deref_atomic_xor:
-         case nir_intrinsic_image_deref_atomic_exchange:
-         case nir_intrinsic_image_deref_atomic_comp_swap:
-         case nir_intrinsic_image_deref_size:
-         case nir_intrinsic_image_deref_samples:
-         case nir_intrinsic_image_deref_load_raw_intel:
-         case nir_intrinsic_image_deref_store_raw_intel: {
-            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
-            nir_variable *var = nir_deref_instr_get_variable(deref);
-
-            struct gl_uniform_storage *storage =
-               &prog->sh.data->UniformStorage[var->data.location];
-            const unsigned image_var_idx =
-               storage->opaque[shader->info.stage].index;
-
-            b.cursor = nir_before_instr(&intrin->instr);
-            nir_ssa_def *index = nir_iadd(&b, nir_imm_int(&b, image_var_idx),
-                                          get_aoa_deref_offset(&b, deref, 1));
-            nir_rewrite_image_intrinsic(intrin, index, false);
-            break;
-         }
-
-         case nir_intrinsic_image_deref_load_param_intel: {
-            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
-            nir_variable *var = nir_deref_instr_get_variable(deref);
-            const unsigned num_images =
-               MAX2(1, var->type->arrays_of_arrays_size());
-
-            b.cursor = nir_instr_remove(&intrin->instr);
-
-            const unsigned param = nir_intrinsic_base(intrin);
-            nir_ssa_def *offset =
-               get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4);
-            offset = nir_iadd(&b, offset, nir_imm_int(&b, param * 16));
-
-            nir_intrinsic_instr *load =
-               nir_intrinsic_instr_create(b.shader,
-                                          nir_intrinsic_load_uniform);
-            nir_intrinsic_set_base(load, var->data.driver_location);
-            nir_intrinsic_set_range(load, num_images * BRW_IMAGE_PARAM_SIZE * 4);
-            load->src[0] = nir_src_for_ssa(offset);
-            load->num_components = intrin->dest.ssa.num_components;
-            nir_ssa_dest_init(&load->instr, &load->dest,
-                              intrin->dest.ssa.num_components,
-                              intrin->dest.ssa.bit_size, NULL);
-            nir_builder_instr_insert(&b, &load->instr);
-
-            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                     &load->dest.ssa);
-            break;
-         }
-
-         default:
-            break;
-         }
-      }
-   }
-}
-
-void
-brw_nir_lower_legacy_clipping(nir_shader *nir, int nr_userclip_plane_consts,
-                              struct brw_stage_prog_data *prog_data)
-{
-   if (nr_userclip_plane_consts == 0)
-      return;
-
-   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
-
-   nir_lower_clip_vs(nir, (1 << nr_userclip_plane_consts) - 1, true, false,
-                     NULL);
-   nir_lower_io_to_temporaries(nir, impl, true, false);
-   nir_lower_global_vars_to_local(nir);
-   nir_lower_vars_to_ssa(nir);
-
-   const unsigned clip_plane_base = nir->num_uniforms;
-
-   assert(nir->num_uniforms == prog_data->nr_params * 4);
-   const unsigned num_clip_floats = 4 * nr_userclip_plane_consts;
-   uint32_t *clip_param =
-      brw_stage_prog_data_add_params(prog_data, num_clip_floats);
-   nir->num_uniforms += num_clip_floats * sizeof(float);
-   assert(nir->num_uniforms == prog_data->nr_params * 4);
-
-   for (unsigned i = 0; i < num_clip_floats; i++)
-      clip_param[i] = BRW_PARAM_BUILTIN_CLIP_PLANE(i / 4, i % 4);
-
-   nir_builder b;
-   nir_builder_init(&b, impl);
-   nir_foreach_block(block, impl) {
-      nir_foreach_instr_safe(instr, block) {
-         if (instr->type != nir_instr_type_intrinsic)
-            continue;
-
-         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-         if (intrin->intrinsic != nir_intrinsic_load_user_clip_plane)
-            continue;
-
-         b.cursor = nir_before_instr(instr);
-
-         nir_intrinsic_instr *load =
-            nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
-         load->num_components = 4;
-         load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
-         nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
-         nir_intrinsic_set_base(load, clip_plane_base + 4 * sizeof(float) *
-                                      nir_intrinsic_ucp_id(intrin));
-         nir_intrinsic_set_range(load, 4 * sizeof(float));
-         nir_builder_instr_insert(&b, &load->instr);
-
-         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                  &load->dest.ssa);
-         nir_instr_remove(instr);
-      }
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/brw_object_purgeable.c b/src/mesa/drivers/dri/i965/brw_object_purgeable.c
deleted file mode 100644
index 104454e..0000000
--- a/src/mesa/drivers/dri/i965/brw_object_purgeable.c
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Copyright Â© 2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/**
- * @file brw_object_purgeable.c
- *
- * The driver implementation of the GL_APPLE_object_purgeable extension.
- */
-
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/bufferobj.h"
-
-#include "brw_context.h"
-#include "brw_buffer_objects.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-
-static GLenum
-brw_buffer_purgeable(struct brw_bo *buffer)
-{
-   int retained = 0;
-
-   if (buffer != NULL)
-      retained = brw_bo_madvise(buffer, I915_MADV_DONTNEED);
-
-   return retained ? GL_VOLATILE_APPLE : GL_RELEASED_APPLE;
-}
-
-static GLenum
-brw_buffer_object_purgeable(struct gl_context * ctx,
-                            struct gl_buffer_object *obj,
-                            GLenum option)
-{
-   struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
-
-   if (intel_obj->buffer != NULL)
-      return brw_buffer_purgeable(intel_obj->buffer);
-
-   if (option == GL_RELEASED_APPLE) {
-      return GL_RELEASED_APPLE;
-   } else {
-      /* XXX Create the buffer and madvise(MADV_DONTNEED)? */
-      return brw_buffer_purgeable(intel_obj->buffer);
-   }
-}
-
-static GLenum
-brw_texture_object_purgeable(struct gl_context * ctx,
-                             struct gl_texture_object *obj,
-                             GLenum option)
-{
-   struct brw_texture_object *intel;
-
-   (void) ctx;
-   (void) option;
-
-   intel = brw_texture_object(obj);
-   if (intel->mt == NULL || intel->mt->bo == NULL)
-      return GL_RELEASED_APPLE;
-
-   return brw_buffer_purgeable(intel->mt->bo);
-}
-
-static GLenum
-brw_render_object_purgeable(struct gl_context * ctx,
-                            struct gl_renderbuffer *obj,
-                            GLenum option)
-{
-   struct brw_renderbuffer *intel;
-
-   (void) ctx;
-   (void) option;
-
-   intel = brw_renderbuffer(obj);
-   if (intel->mt == NULL)
-      return GL_RELEASED_APPLE;
-
-   return brw_buffer_purgeable(intel->mt->bo);
-}
-
-static int
-brw_bo_unpurgeable(struct brw_bo *buffer)
-{
-   int retained;
-
-   retained = 0;
-   if (buffer != NULL)
-      retained = brw_bo_madvise(buffer, I915_MADV_WILLNEED);
-
-   return retained;
-}
-
-static GLenum
-brw_buffer_object_unpurgeable(struct gl_context * ctx,
-                              struct gl_buffer_object *obj,
-                              GLenum option)
-{
-   struct brw_buffer_object *intel = brw_buffer_object(obj);
-
-   (void) ctx;
-
-   if (!intel->buffer)
-      return GL_UNDEFINED_APPLE;
-
-   if (option == GL_UNDEFINED_APPLE || !brw_bo_unpurgeable(intel->buffer)) {
-      brw_bo_unreference(intel->buffer);
-      intel->buffer = NULL;
-      return GL_UNDEFINED_APPLE;
-   }
-
-   return GL_RETAINED_APPLE;
-}
-
-static GLenum
-brw_texture_object_unpurgeable(struct gl_context * ctx,
-                                 struct gl_texture_object *obj,
-                                 GLenum option)
-{
-   struct brw_texture_object *intel;
-
-   (void) ctx;
-
-   intel = brw_texture_object(obj);
-   if (intel->mt == NULL || intel->mt->bo == NULL)
-      return GL_UNDEFINED_APPLE;
-
-   if (option == GL_UNDEFINED_APPLE || !brw_bo_unpurgeable(intel->mt->bo)) {
-      brw_miptree_release(&intel->mt);
-      return GL_UNDEFINED_APPLE;
-   }
-
-   return GL_RETAINED_APPLE;
-}
-
-static GLenum
-brw_render_object_unpurgeable(struct gl_context * ctx,
-                              struct gl_renderbuffer *obj,
-                              GLenum option)
-{
-   struct brw_renderbuffer *intel;
-
-   (void) ctx;
-
-   intel = brw_renderbuffer(obj);
-   if (intel->mt == NULL)
-      return GL_UNDEFINED_APPLE;
-
-   if (option == GL_UNDEFINED_APPLE || !brw_bo_unpurgeable(intel->mt->bo)) {
-      brw_miptree_release(&intel->mt);
-      return GL_UNDEFINED_APPLE;
-   }
-
-   return GL_RETAINED_APPLE;
-}
-
-void
-brw_init_object_purgeable_functions(struct dd_function_table *functions)
-{
-   functions->BufferObjectPurgeable = brw_buffer_object_purgeable;
-   functions->TextureObjectPurgeable = brw_texture_object_purgeable;
-   functions->RenderObjectPurgeable = brw_render_object_purgeable;
-
-   functions->BufferObjectUnpurgeable = brw_buffer_object_unpurgeable;
-   functions->TextureObjectUnpurgeable = brw_texture_object_unpurgeable;
-   functions->RenderObjectUnpurgeable = brw_render_object_unpurgeable;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c
deleted file mode 100644
index 43bd4d6..0000000
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ /dev/null
@@ -1,533 +0,0 @@
-/*
- * Copyright Â© 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_performance_query.c
- *
- * Implementation of the GL_INTEL_performance_query extension.
- *
- * Currently there are two possible counter sources exposed here:
- *
- * On Gfx6+ hardware we have numerous 64bit Pipeline Statistics Registers
- * that we can snapshot at the beginning and end of a query.
- *
- * On Gfx7.5+ we have Observability Architecture counters which are
- * covered in separate document from the rest of the PRMs.  It is available at:
- * https://01.org/linuxgraphics/documentation/driver-documentation-prms
- * => 2013 Intel Core Processor Family => Observability Performance Counters
- * (This one volume covers Sandybridge, Ivybridge, Baytrail, and Haswell,
- * though notably we currently only support OA counters for Haswell+)
- */
-
-#include <limits.h>
-
-/* put before sys/types.h to silence glibc warnings */
-#ifdef MAJOR_IN_MKDEV
-#include <sys/mkdev.h>
-#endif
-#ifdef MAJOR_IN_SYSMACROS
-#include <sys/sysmacros.h>
-#endif
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/ioctl.h>
-
-#include <xf86drm.h>
-#include "drm-uapi/i915_drm.h"
-
-#include "main/hash.h"
-#include "main/macros.h"
-#include "main/mtypes.h"
-#include "main/performance_query.h"
-
-#include "util/bitset.h"
-#include "util/ralloc.h"
-#include "util/hash_table.h"
-#include "util/list.h"
-#include "util/u_math.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_batch.h"
-
-#include "perf/intel_perf.h"
-#include "perf/intel_perf_regs.h"
-#include "perf/intel_perf_mdapi.h"
-#include "perf/intel_perf_query.h"
-
-#define FILE_DEBUG_FLAG DEBUG_PERFMON
-
-#define OAREPORT_REASON_MASK           0x3f
-#define OAREPORT_REASON_SHIFT          19
-#define OAREPORT_REASON_TIMER          (1<<0)
-#define OAREPORT_REASON_TRIGGER1       (1<<1)
-#define OAREPORT_REASON_TRIGGER2       (1<<2)
-#define OAREPORT_REASON_CTX_SWITCH     (1<<3)
-#define OAREPORT_REASON_GO_TRANSITION  (1<<4)
-
-struct brw_perf_query_object {
-   struct gl_perf_query_object base;
-   struct intel_perf_query_object *query;
-};
-
-/** Downcasting convenience macro. */
-static inline struct brw_perf_query_object *
-brw_perf_query(struct gl_perf_query_object *o)
-{
-   return (struct brw_perf_query_object *) o;
-}
-
-#define MI_RPC_BO_SIZE              4096
-#define MI_RPC_BO_END_OFFSET_BYTES  (MI_RPC_BO_SIZE / 2)
-#define MI_FREQ_START_OFFSET_BYTES  (3072)
-#define MI_FREQ_END_OFFSET_BYTES    (3076)
-
-/******************************************************************************/
-
-static bool
-brw_is_perf_query_ready(struct gl_context *ctx,
-                        struct gl_perf_query_object *o);
-
-static void
-dump_perf_query_callback(void *query_void, void *brw_void)
-{
-   struct brw_context *ctx = brw_void;
-   struct intel_perf_context *perf_ctx = ctx->perf_ctx;
-   struct gl_perf_query_object *o = query_void;
-   struct brw_perf_query_object * brw_query = brw_perf_query(o);
-   struct intel_perf_query_object *obj = brw_query->query;
-
-   DBG("%4d: %-6s %-8s ",
-       o->Id,
-       o->Used ? "Dirty," : "New,",
-       o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"));
-   intel_perf_dump_query(perf_ctx, obj, &ctx->batch);
-}
-
-static void
-dump_perf_queries(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   intel_perf_dump_query_count(brw->perf_ctx);
-   _mesa_HashWalk(ctx->PerfQuery.Objects, dump_perf_query_callback, brw);
-}
-
-/**
- * Driver hook for glGetPerfQueryInfoINTEL().
- */
-static void
-brw_get_perf_query_info(struct gl_context *ctx,
-                        unsigned query_index,
-                        const char **name,
-                        GLuint *data_size,
-                        GLuint *n_counters,
-                        GLuint *n_active)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct intel_perf_context *perf_ctx = brw->perf_ctx;
-   struct intel_perf_config *perf_cfg = intel_perf_config(perf_ctx);
-   const struct intel_perf_query_info *query = &perf_cfg->queries[query_index];
-
-   *name = query->name;
-   *data_size = query->data_size;
-   *n_counters = query->n_counters;
-   *n_active = intel_perf_active_queries(perf_ctx, query);
-}
-
-static GLuint
-intel_counter_type_enum_to_gl_type(enum intel_perf_counter_type type)
-{
-   switch (type) {
-   case INTEL_PERF_COUNTER_TYPE_EVENT: return GL_PERFQUERY_COUNTER_EVENT_INTEL;
-   case INTEL_PERF_COUNTER_TYPE_DURATION_NORM: return GL_PERFQUERY_COUNTER_DURATION_NORM_INTEL;
-   case INTEL_PERF_COUNTER_TYPE_DURATION_RAW: return GL_PERFQUERY_COUNTER_DURATION_RAW_INTEL;
-   case INTEL_PERF_COUNTER_TYPE_THROUGHPUT: return GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
-   case INTEL_PERF_COUNTER_TYPE_RAW: return GL_PERFQUERY_COUNTER_RAW_INTEL;
-   case INTEL_PERF_COUNTER_TYPE_TIMESTAMP: return GL_PERFQUERY_COUNTER_TIMESTAMP_INTEL;
-   default:
-      unreachable("Unknown counter type");
-   }
-}
-
-static GLuint
-intel_counter_data_type_to_gl_type(enum intel_perf_counter_data_type type)
-{
-   switch (type) {
-   case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32: return GL_PERFQUERY_COUNTER_DATA_BOOL32_INTEL;
-   case INTEL_PERF_COUNTER_DATA_TYPE_UINT32: return GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL;
-   case INTEL_PERF_COUNTER_DATA_TYPE_UINT64: return GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
-   case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT: return GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
-   case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: return GL_PERFQUERY_COUNTER_DATA_DOUBLE_INTEL;
-   default:
-      unreachable("Unknown counter data type");
-   }
-}
-
-/**
- * Driver hook for glGetPerfCounterInfoINTEL().
- */
-static void
-brw_get_perf_counter_info(struct gl_context *ctx,
-                          unsigned query_index,
-                          unsigned counter_index,
-                          const char **name,
-                          const char **desc,
-                          GLuint *offset,
-                          GLuint *data_size,
-                          GLuint *type_enum,
-                          GLuint *data_type_enum,
-                          GLuint64 *raw_max)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct intel_perf_config *perf_cfg = intel_perf_config(brw->perf_ctx);
-   const struct intel_perf_query_info *query =
-      &perf_cfg->queries[query_index];
-   const struct intel_perf_query_counter *counter =
-      &query->counters[counter_index];
-
-   *name = counter->name;
-   *desc = counter->desc;
-   *offset = counter->offset;
-   *data_size = intel_perf_query_counter_get_size(counter);
-   *type_enum = intel_counter_type_enum_to_gl_type(counter->type);
-   *data_type_enum = intel_counter_data_type_to_gl_type(counter->data_type);
-   *raw_max = counter->raw_max;
-}
-
-enum OaReadStatus {
-   OA_READ_STATUS_ERROR,
-   OA_READ_STATUS_UNFINISHED,
-   OA_READ_STATUS_FINISHED,
-};
-
-/******************************************************************************/
-
-/**
- * Driver hook for glBeginPerfQueryINTEL().
- */
-static bool
-brw_begin_perf_query(struct gl_context *ctx,
-                     struct gl_perf_query_object *o)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_perf_query_object *brw_query = brw_perf_query(o);
-   struct intel_perf_query_object *obj = brw_query->query;
-   struct intel_perf_context *perf_ctx = brw->perf_ctx;
-
-   /* We can assume the frontend hides mistaken attempts to Begin a
-    * query object multiple times before its End. Similarly if an
-    * application reuses a query object before results have arrived
-    * the frontend will wait for prior results so we don't need
-    * to support abandoning in-flight results.
-    */
-   assert(!o->Active);
-   assert(!o->Used || o->Ready); /* no in-flight query to worry about */
-
-   DBG("Begin(%d)\n", o->Id);
-
-   bool ret = intel_perf_begin_query(perf_ctx, obj);
-
-   if (INTEL_DEBUG(DEBUG_PERFMON))
-      dump_perf_queries(brw);
-
-   return ret;
-}
-
-/**
- * Driver hook for glEndPerfQueryINTEL().
- */
-static void
-brw_end_perf_query(struct gl_context *ctx,
-                     struct gl_perf_query_object *o)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_perf_query_object *brw_query = brw_perf_query(o);
-   struct intel_perf_query_object *obj = brw_query->query;
-   struct intel_perf_context *perf_ctx = brw->perf_ctx;
-
-   DBG("End(%d)\n", o->Id);
-   intel_perf_end_query(perf_ctx, obj);
-}
-
-static void
-brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_perf_query_object *brw_query = brw_perf_query(o);
-   struct intel_perf_query_object *obj = brw_query->query;
-
-   assert(!o->Ready);
-
-   intel_perf_wait_query(brw->perf_ctx, obj, &brw->batch);
-}
-
-static bool
-brw_is_perf_query_ready(struct gl_context *ctx,
-                        struct gl_perf_query_object *o)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_perf_query_object *brw_query = brw_perf_query(o);
-   struct intel_perf_query_object *obj = brw_query->query;
-
-   if (o->Ready)
-      return true;
-
-   return intel_perf_is_query_ready(brw->perf_ctx, obj, &brw->batch);
-}
-
-/**
- * Driver hook for glGetPerfQueryDataINTEL().
- */
-static bool
-brw_get_perf_query_data(struct gl_context *ctx,
-                        struct gl_perf_query_object *o,
-                        GLsizei data_size,
-                        GLuint *data,
-                        GLuint *bytes_written)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_perf_query_object *brw_query = brw_perf_query(o);
-   struct intel_perf_query_object *obj = brw_query->query;
-
-   assert(brw_is_perf_query_ready(ctx, o));
-
-   DBG("GetData(%d)\n", o->Id);
-
-   if (INTEL_DEBUG(DEBUG_PERFMON))
-      dump_perf_queries(brw);
-
-   /* We expect that the frontend only calls this hook when it knows
-    * that results are available.
-    */
-   assert(o->Ready);
-
-   intel_perf_get_query_data(brw->perf_ctx, obj, &brw->batch,
-                           data_size, data, bytes_written);
-
-   return true;
-}
-
-static struct gl_perf_query_object *
-brw_new_perf_query_object(struct gl_context *ctx, unsigned query_index)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct intel_perf_context *perf_ctx = brw->perf_ctx;
-   struct intel_perf_query_object * obj = intel_perf_new_query(perf_ctx, query_index);
-   if (unlikely(!obj))
-      return NULL;
-
-   struct brw_perf_query_object *brw_query = calloc(1, sizeof(struct brw_perf_query_object));
-   if (unlikely(!brw_query)) {
-      intel_perf_delete_query(perf_ctx, obj);
-      return NULL;
-   }
-
-   brw_query->query = obj;
-   return &brw_query->base;
-}
-
-/**
- * Driver hook for glDeletePerfQueryINTEL().
- */
-static void
-brw_delete_perf_query(struct gl_context *ctx,
-                      struct gl_perf_query_object *o)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_perf_query_object *brw_query = brw_perf_query(o);
-   struct intel_perf_query_object *obj = brw_query->query;
-   struct intel_perf_context *perf_ctx = brw->perf_ctx;
-
-   /* We can assume that the frontend waits for a query to complete
-    * before ever calling into here, so we don't have to worry about
-    * deleting an in-flight query object.
-    */
-   assert(!o->Active);
-   assert(!o->Used || o->Ready);
-
-   DBG("Delete(%d)\n", o->Id);
-
-   intel_perf_delete_query(perf_ctx, obj);
-   free(brw_query);
-}
-
-/******************************************************************************/
-/* intel_device_info will have incorrect default topology values for unsupported
- * kernels. Verify kernel support to ensure OA metrics are accurate.
- */
-static bool
-oa_metrics_kernel_support(int fd, const struct intel_device_info *devinfo)
-{
-   if (devinfo->ver >= 10) {
-      /* topology uAPI required for CNL+ (kernel 4.17+) make a call to the api
-       * to verify support
-       */
-      struct drm_i915_query_item item = {
-         .query_id = DRM_I915_QUERY_TOPOLOGY_INFO,
-      };
-      struct drm_i915_query query = {
-         .num_items = 1,
-         .items_ptr = (uintptr_t) &item,
-      };
-
-      /* kernel 4.17+ supports the query */
-      return drmIoctl(fd, DRM_IOCTL_I915_QUERY, &query) == 0;
-   }
-
-   if (devinfo->ver >= 8) {
-      /* 4.13+ api required for gfx8 - gfx9 */
-      int mask;
-      struct drm_i915_getparam gp = {
-         .param = I915_PARAM_SLICE_MASK,
-         .value = &mask,
-      };
-      /* kernel 4.13+ supports this parameter */
-      return drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0;
-   }
-
-   if (devinfo->ver == 7)
-      /* default topology values are correct for HSW */
-      return true;
-
-   /* oa not supported before gen 7*/
-   return false;
-}
-
-static void *
-brw_oa_bo_alloc(void *bufmgr, const char *name, uint64_t size)
-{
-   return brw_bo_alloc(bufmgr, name, size, BRW_MEMZONE_OTHER);
-}
-
-static void
-brw_oa_emit_mi_report_perf_count(void *c,
-                                 void *bo,
-                                 uint32_t offset_in_bytes,
-                                 uint32_t report_id)
-{
-   struct brw_context *ctx = c;
-   ctx->vtbl.emit_mi_report_perf_count(ctx,
-                                       bo,
-                                       offset_in_bytes,
-                                       report_id);
-}
-
-typedef void (*bo_unreference_t)(void *);
-typedef void *(*bo_map_t)(void *, void *, unsigned flags);
-typedef void (*bo_unmap_t)(void *);
-typedef void (* emit_mi_report_t)(void *, void *, uint32_t, uint32_t);
-typedef void (*emit_mi_flush_t)(void *);
-
-static void
-brw_oa_batchbuffer_flush(void *c, const char *file, int line)
-{
-   struct brw_context *ctx = c;
-   _brw_batch_flush_fence(ctx, -1, NULL, file,  line);
-}
-
-static void
-brw_oa_emit_stall_at_pixel_scoreboard(void *c)
-{
-   struct brw_context *brw = c;
-   brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_STALL_AT_SCOREBOARD);
-}
-
-static void
-brw_perf_store_register(struct brw_context *brw, struct brw_bo *bo,
-                        uint32_t reg, uint32_t reg_size,
-                        uint32_t offset)
-{
-   if (reg_size == 8) {
-      brw_store_register_mem64(brw, bo, reg, offset);
-   } else {
-      assert(reg_size == 4);
-      brw_store_register_mem32(brw, bo, reg, offset);
-   }
-}
-
-typedef void (*store_register_mem_t)(void *ctx, void *bo,
-                                     uint32_t reg, uint32_t reg_size,
-                                     uint32_t offset);
-typedef bool (*batch_references_t)(void *batch, void *bo);
-typedef void (*bo_wait_rendering_t)(void *bo);
-typedef int (*bo_busy_t)(void *bo);
-
-static unsigned
-brw_init_perf_query_info(struct gl_context *ctx)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   struct intel_perf_context *perf_ctx = brw->perf_ctx;
-   struct intel_perf_config *perf_cfg = intel_perf_config(perf_ctx);
-
-   if (perf_cfg)
-      return perf_cfg->n_queries;
-
-   if (!oa_metrics_kernel_support(brw->screen->fd, devinfo))
-      return 0;
-
-   perf_cfg = intel_perf_new(brw->mem_ctx);
-
-   perf_cfg->vtbl.bo_alloc = brw_oa_bo_alloc;
-   perf_cfg->vtbl.bo_unreference = (bo_unreference_t)brw_bo_unreference;
-   perf_cfg->vtbl.bo_map = (bo_map_t)brw_bo_map;
-   perf_cfg->vtbl.bo_unmap = (bo_unmap_t)brw_bo_unmap;
-   perf_cfg->vtbl.emit_stall_at_pixel_scoreboard =
-      (emit_mi_flush_t)brw_oa_emit_stall_at_pixel_scoreboard;
-   perf_cfg->vtbl.emit_mi_report_perf_count =
-      (emit_mi_report_t)brw_oa_emit_mi_report_perf_count;
-   perf_cfg->vtbl.batchbuffer_flush = brw_oa_batchbuffer_flush;
-   perf_cfg->vtbl.store_register_mem =
-      (store_register_mem_t) brw_perf_store_register;
-   perf_cfg->vtbl.batch_references = (batch_references_t)brw_batch_references;
-   perf_cfg->vtbl.bo_wait_rendering = (bo_wait_rendering_t)brw_bo_wait_rendering;
-   perf_cfg->vtbl.bo_busy = (bo_busy_t)brw_bo_busy;
-
-   intel_perf_init_metrics(perf_cfg, devinfo, brw->screen->fd,
-                           true /* pipeline stats */,
-                           true /* register snapshots */);
-   intel_perf_init_context(perf_ctx, perf_cfg, brw->mem_ctx, brw, brw->bufmgr,
-                         devinfo, brw->hw_ctx, brw->screen->fd);
-
-   return perf_cfg->n_queries;
-}
-
-void
-brw_init_performance_queries(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   ctx->Driver.InitPerfQueryInfo = brw_init_perf_query_info;
-   ctx->Driver.GetPerfQueryInfo = brw_get_perf_query_info;
-   ctx->Driver.GetPerfCounterInfo = brw_get_perf_counter_info;
-   ctx->Driver.NewPerfQueryObject = brw_new_perf_query_object;
-   ctx->Driver.DeletePerfQuery = brw_delete_perf_query;
-   ctx->Driver.BeginPerfQuery = brw_begin_perf_query;
-   ctx->Driver.EndPerfQuery = brw_end_perf_query;
-   ctx->Driver.WaitPerfQuery = brw_wait_perf_query;
-   ctx->Driver.IsPerfQueryReady = brw_is_perf_query_ready;
-   ctx->Driver.GetPerfQueryData = brw_get_perf_query_data;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
deleted file mode 100644
index 7c2cfde..0000000
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
- * Copyright Â© 2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "brw_batch.h"
-#include "brw_fbo.h"
-
-/**
- * Emit a PIPE_CONTROL with various flushing flags.
- *
- * The caller is responsible for deciding what flags are appropriate for the
- * given generation.
- */
-void
-brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (devinfo->ver >= 6 &&
-       (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
-       (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
-      /* A pipe control command with flush and invalidate bits set
-       * simultaneously is an inherently racy operation on Gfx6+ if the
-       * contents of the flushed caches were intended to become visible from
-       * any of the invalidated caches.  Split it in two PIPE_CONTROLs, the
-       * first one should stall the pipeline to make sure that the flushed R/W
-       * caches are coherent with memory once the specified R/O caches are
-       * invalidated.  On pre-Gfx6 hardware the (implicit) R/O cache
-       * invalidation seems to happen at the bottom of the pipeline together
-       * with any write cache flush, so this shouldn't be a concern.  In order
-       * to ensure a full stall, we do an end-of-pipe sync.
-       */
-      brw_emit_end_of_pipe_sync(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS));
-      flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
-   }
-
-   brw->vtbl.emit_raw_pipe_control(brw, flags, NULL, 0, 0);
-}
-
-/**
- * Emit a PIPE_CONTROL that writes to a buffer object.
- *
- * \p flags should contain one of the following items:
- *  - PIPE_CONTROL_WRITE_IMMEDIATE
- *  - PIPE_CONTROL_WRITE_TIMESTAMP
- *  - PIPE_CONTROL_WRITE_DEPTH_COUNT
- */
-void
-brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
-                            struct brw_bo *bo, uint32_t offset,
-                            uint64_t imm)
-{
-   brw->vtbl.emit_raw_pipe_control(brw, flags, bo, offset, imm);
-}
-
-/**
- * Restriction [DevSNB, DevIVB]:
- *
- * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
- * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
- * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
- * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
- * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
- * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
- * unless SW can otherwise guarantee that the pipeline from WM onwards is
- * already flushed (e.g., via a preceding MI_FLUSH).
- */
-void
-brw_emit_depth_stall_flushes(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   assert(devinfo->ver >= 6);
-
-   /* Starting on BDW, these pipe controls are unnecessary.
-    *
-    *   WM HW will internally manage the draining pipe and flushing of the caches
-    *   when this command is issued. The PIPE_CONTROL restrictions are removed.
-    */
-   if (devinfo->ver >= 8)
-      return;
-
-   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
-   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH);
-   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
-}
-
-/**
- * From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input):
- * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
- *  stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
- *  3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
- *  3DSTATE_SAMPLER_STATE_POINTER_VS command.  Only one PIPE_CONTROL needs
- *  to be sent before any combination of VS associated 3DSTATE."
- */
-void
-gfx7_emit_vs_workaround_flush(struct brw_context *brw)
-{
-   ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   assert(devinfo->ver == 7);
-   brw_emit_pipe_control_write(brw,
-                               PIPE_CONTROL_WRITE_IMMEDIATE
-                               | PIPE_CONTROL_DEPTH_STALL,
-                               brw->workaround_bo,
-                               brw->workaround_bo_offset, 0);
-}
-
-/**
- * From the PRM, Volume 2a:
- *
- *    "Indirect State Pointers Disable
- *
- *    At the completion of the post-sync operation associated with this pipe
- *    control packet, the indirect state pointers in the hardware are
- *    considered invalid; the indirect pointers are not saved in the context.
- *    If any new indirect state commands are executed in the command stream
- *    while the pipe control is pending, the new indirect state commands are
- *    preserved.
- *
- *    [DevIVB+]: Using Invalidate State Pointer (ISP) only inhibits context
- *    restoring of Push Constant (3DSTATE_CONSTANT_*) commands. Push Constant
- *    commands are only considered as Indirect State Pointers. Once ISP is
- *    issued in a context, SW must initialize by programming push constant
- *    commands for all the shaders (at least to zero length) before attempting
- *    any rendering operation for the same context."
- *
- * 3DSTATE_CONSTANT_* packets are restored during a context restore,
- * even though they point to a BO that has been already unreferenced at
- * the end of the previous batch buffer. This has been fine so far since
- * we are protected by these scratch page (every address not covered by
- * a BO should be pointing to the scratch page). But on CNL, it is
- * causing a GPU hang during context restore at the 3DSTATE_CONSTANT_*
- * instruction.
- *
- * The flag "Indirect State Pointers Disable" in PIPE_CONTROL tells the
- * hardware to ignore previous 3DSTATE_CONSTANT_* packets during a
- * context restore, so the mentioned hang doesn't happen. However,
- * software must program push constant commands for all stages prior to
- * rendering anything, so we flag them as dirty.
- *
- * Finally, we also make sure to stall at pixel scoreboard to make sure the
- * constants have been loaded into the EUs prior to disable the push constants
- * so that it doesn't hang a previous 3DPRIMITIVE.
- */
-void
-gfx7_emit_isp_disable(struct brw_context *brw)
-{
-   brw->vtbl.emit_raw_pipe_control(brw,
-                                   PIPE_CONTROL_STALL_AT_SCOREBOARD |
-                                   PIPE_CONTROL_CS_STALL,
-                                   NULL, 0, 0);
-   brw->vtbl.emit_raw_pipe_control(brw,
-                                   PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE |
-                                   PIPE_CONTROL_CS_STALL,
-                                   NULL, 0, 0);
-
-   brw->vs.base.push_constants_dirty = true;
-   brw->tcs.base.push_constants_dirty = true;
-   brw->tes.base.push_constants_dirty = true;
-   brw->gs.base.push_constants_dirty = true;
-   brw->wm.base.push_constants_dirty = true;
-}
-
-/**
- * Emit a PIPE_CONTROL command for gfx7 with the CS Stall bit set.
- */
-void
-gfx7_emit_cs_stall_flush(struct brw_context *brw)
-{
-   brw_emit_pipe_control_write(brw,
-                               PIPE_CONTROL_CS_STALL
-                               | PIPE_CONTROL_WRITE_IMMEDIATE,
-                               brw->workaround_bo,
-                               brw->workaround_bo_offset, 0);
-}
-
-/**
- * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
- * implementing two workarounds on gfx6.  From section 1.4.7.1
- * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
- *
- * [DevSNB-C+{W/A}] Before any depth stall flush (including those
- * produced by non-pipelined state commands), software needs to first
- * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
- * 0.
- *
- * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
- * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
- *
- * And the workaround for these two requires this workaround first:
- *
- * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
- * BEFORE the pipe-control with a post-sync op and no write-cache
- * flushes.
- *
- * And this last workaround is tricky because of the requirements on
- * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
- * volume 2 part 1:
- *
- *     "1 of the following must also be set:
- *      - Render Target Cache Flush Enable ([12] of DW1)
- *      - Depth Cache Flush Enable ([0] of DW1)
- *      - Stall at Pixel Scoreboard ([1] of DW1)
- *      - Depth Stall ([13] of DW1)
- *      - Post-Sync Operation ([13] of DW1)
- *      - Notify Enable ([8] of DW1)"
- *
- * The cache flushes require the workaround flush that triggered this
- * one, so we can't use it.  Depth stall would trigger the same.
- * Post-sync nonzero is what triggered this second workaround, so we
- * can't use that one either.  Notify enable is IRQs, which aren't
- * really our business.  That leaves only stall at scoreboard.
- */
-void
-brw_emit_post_sync_nonzero_flush(struct brw_context *brw)
-{
-   brw_emit_pipe_control_flush(brw,
-                               PIPE_CONTROL_CS_STALL |
-                               PIPE_CONTROL_STALL_AT_SCOREBOARD);
-
-   brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE,
-                               brw->workaround_bo,
-                               brw->workaround_bo_offset, 0);
-}
-
-/*
- * From Sandybridge PRM, volume 2, "1.7.2 End-of-Pipe Synchronization":
- *
- *  Write synchronization is a special case of end-of-pipe
- *  synchronization that requires that the render cache and/or depth
- *  related caches are flushed to memory, where the data will become
- *  globally visible. This type of synchronization is required prior to
- *  SW (CPU) actually reading the result data from memory, or initiating
- *  an operation that will use as a read surface (such as a texture
- *  surface) a previous render target and/or depth/stencil buffer
- *
- *
- * From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
- *
- *  Exercising the write cache flush bits (Render Target Cache Flush
- *  Enable, Depth Cache Flush Enable, DC Flush) in PIPE_CONTROL only
- *  ensures the write caches are flushed and doesn't guarantee the data
- *  is globally visible.
- *
- *  SW can track the completion of the end-of-pipe-synchronization by
- *  using "Notify Enable" and "PostSync Operation - Write Immediate
- *  Data" in the PIPE_CONTROL command.
- */
-void
-brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (devinfo->ver >= 6) {
-      /* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory":
-       *
-       *    "The most common action to perform upon reaching a synchronization
-       *    point is to write a value out to memory. An immediate value
-       *    (included with the synchronization command) may be written."
-       *
-       *
-       * From Broadwell PRM, volume 7, "End-of-Pipe Synchronization":
-       *
-       *    "In case the data flushed out by the render engine is to be read
-       *    back in to the render engine in coherent manner, then the render
-       *    engine has to wait for the fence completion before accessing the
-       *    flushed data. This can be achieved by following means on various
-       *    products: PIPE_CONTROL command with CS Stall and the required
-       *    write caches flushed with Post-Sync-Operation as Write Immediate
-       *    Data.
-       *
-       *    Example:
-       *       - Workload-1 (3D/GPGPU/MEDIA)
-       *       - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write Immediate
-       *         Data, Required Write Cache Flush bits set)
-       *       - Workload-2 (Can use the data produce or output by Workload-1)
-       */
-      brw_emit_pipe_control_write(brw,
-                                  flags | PIPE_CONTROL_CS_STALL |
-                                  PIPE_CONTROL_WRITE_IMMEDIATE,
-                                  brw->workaround_bo,
-                                  brw->workaround_bo_offset, 0);
-
-      if (devinfo->platform == INTEL_PLATFORM_HSW) {
-         /* Haswell needs addition work-arounds:
-          *
-          * From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
-          *
-          *    Option 1:
-          *    PIPE_CONTROL command with the CS Stall and the required write
-          *    caches flushed with Post-SyncOperation as Write Immediate Data
-          *    followed by eight dummy MI_STORE_DATA_IMM (write to scratch
-          *    spce) commands.
-          *
-          *    Example:
-          *       - Workload-1
-          *       - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write
-          *         Immediate Data, Required Write Cache Flush bits set)
-          *       - MI_STORE_DATA_IMM (8 times) (Dummy data, Scratch Address)
-          *       - Workload-2 (Can use the data produce or output by
-          *         Workload-1)
-          *
-          * Unfortunately, both the PRMs and the internal docs are a bit
-          * out-of-date in this regard.  What the windows driver does (and
-          * this appears to actually work) is to emit a register read from the
-          * memory address written by the pipe control above.
-          *
-          * What register we load into doesn't matter.  We choose an indirect
-          * rendering register because we know it always exists and it's one
-          * of the first registers the command parser allows us to write.  If
-          * you don't have command parser support in your kernel (pre-4.2),
-          * this will get turned into MI_NOOP and you won't get the
-          * workaround.  Unfortunately, there's just not much we can do in
-          * that case.  This register is perfectly safe to write since we
-          * always re-load all of the indirect draw registers right before
-          * 3DPRIMITIVE when needed anyway.
-          */
-         brw_load_register_mem(brw, GFX7_3DPRIM_START_INSTANCE,
-                               brw->workaround_bo, brw->workaround_bo_offset);
-      }
-   } else {
-      /* On gfx4-5, a regular pipe control seems to suffice. */
-      brw_emit_pipe_control_flush(brw, flags);
-   }
-}
-
-/* Emit a pipelined flush to either flush render and texture cache for
- * reading from a FBO-drawn texture, or flush so that frontbuffer
- * render appears on the screen in DRI1.
- *
- * This is also used for the always_flush_cache driconf debug option.
- */
-void
-brw_emit_mi_flush(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH;
-   if (devinfo->ver >= 6) {
-      flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
-               PIPE_CONTROL_CONST_CACHE_INVALIDATE |
-               PIPE_CONTROL_DATA_CACHE_FLUSH |
-               PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-               PIPE_CONTROL_VF_CACHE_INVALIDATE |
-               PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
-               PIPE_CONTROL_CS_STALL;
-   }
-   brw_emit_pipe_control_flush(brw, flags);
-}
-
-static bool
-init_identifier_bo(struct brw_context *brw)
-{
-   void *bo_map;
-
-   if (!can_do_exec_capture(brw->screen))
-      return true;
-
-   bo_map = brw_bo_map(NULL, brw->workaround_bo, MAP_READ | MAP_WRITE);
-   if (!bo_map)
-      return false;
-
-   brw->workaround_bo->kflags |= EXEC_OBJECT_CAPTURE;
-   brw->workaround_bo_offset =
-      ALIGN(intel_debug_write_identifiers(bo_map, 4096, "i965") + 8, 8);
-
-   brw_bo_unmap(brw->workaround_bo);
-
-   return true;
-}
-
-int
-brw_init_pipe_control(struct brw_context *brw,
-                      const struct intel_device_info *devinfo)
-{
-   switch (devinfo->ver) {
-   case 11:
-      brw->vtbl.emit_raw_pipe_control = gfx11_emit_raw_pipe_control;
-      break;
-   case 9:
-      brw->vtbl.emit_raw_pipe_control = gfx9_emit_raw_pipe_control;
-      break;
-   case 8:
-      brw->vtbl.emit_raw_pipe_control = gfx8_emit_raw_pipe_control;
-      break;
-   case 7:
-      brw->vtbl.emit_raw_pipe_control =
-         devinfo->verx10 == 75 ?
-         gfx75_emit_raw_pipe_control : gfx7_emit_raw_pipe_control;
-      break;
-   case 6:
-      brw->vtbl.emit_raw_pipe_control = gfx6_emit_raw_pipe_control;
-      break;
-   case 5:
-      brw->vtbl.emit_raw_pipe_control = gfx5_emit_raw_pipe_control;
-      break;
-   case 4:
-      brw->vtbl.emit_raw_pipe_control =
-         devinfo->verx10 == 45 ?
-         gfx45_emit_raw_pipe_control : gfx4_emit_raw_pipe_control;
-      break;
-   default:
-      unreachable("Unhandled Gen.");
-   }
-
-   if (devinfo->ver < 6)
-      return 0;
-
-   /* We can't just use brw_state_batch to get a chunk of space for
-    * the gfx6 workaround because it involves actually writing to
-    * the buffer, and the kernel doesn't let us write to the batch.
-    */
-   brw->workaround_bo = brw_bo_alloc(brw->bufmgr, "workaround", 4096,
-                                     BRW_MEMZONE_OTHER);
-   if (brw->workaround_bo == NULL)
-      return -ENOMEM;
-
-   if (!init_identifier_bo(brw))
-      return -ENOMEM; /* Couldn't map workaround_bo?? */
-
-   brw->workaround_bo_offset = 0;
-   brw->pipe_controls_since_last_cs_stall = 0;
-
-   return 0;
-}
-
-void
-brw_fini_pipe_control(struct brw_context *brw)
-{
-   brw_bo_unreference(brw->workaround_bo);
-}
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.h b/src/mesa/drivers/dri/i965/brw_pipe_control.h
deleted file mode 100644
index 1aed53e..0000000
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright Â© 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_PIPE_CONTROL_DOT_H
-#define BRW_PIPE_CONTROL_DOT_H
-
-struct brw_context;
-struct intel_device_info;
-struct brw_bo;
-
-/** @{
- *
- * PIPE_CONTROL operation, a combination MI_FLUSH and register write with
- * additional flushing control.
- *
- * The bits here are not the actual hardware values.  The actual values
- * shift around a bit per-generation, so we just have flags for each
- * potential operation, and use genxml to encode the actual packet.
- */
-enum pipe_control_flags
-{
-   PIPE_CONTROL_FLUSH_LLC                       = (1 << 1),
-   PIPE_CONTROL_LRI_POST_SYNC_OP                = (1 << 2),
-   PIPE_CONTROL_STORE_DATA_INDEX                = (1 << 3),
-   PIPE_CONTROL_CS_STALL                        = (1 << 4),
-   PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET     = (1 << 5),
-   PIPE_CONTROL_SYNC_GFDT                       = (1 << 6),
-   PIPE_CONTROL_TLB_INVALIDATE                  = (1 << 7),
-   PIPE_CONTROL_MEDIA_STATE_CLEAR               = (1 << 8),
-   PIPE_CONTROL_WRITE_IMMEDIATE                 = (1 << 9),
-   PIPE_CONTROL_WRITE_DEPTH_COUNT               = (1 << 10),
-   PIPE_CONTROL_WRITE_TIMESTAMP                 = (1 << 11),
-   PIPE_CONTROL_DEPTH_STALL                     = (1 << 12),
-   PIPE_CONTROL_RENDER_TARGET_FLUSH             = (1 << 13),
-   PIPE_CONTROL_INSTRUCTION_INVALIDATE          = (1 << 14),
-   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE        = (1 << 15),
-   PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16),
-   PIPE_CONTROL_NOTIFY_ENABLE                   = (1 << 17),
-   PIPE_CONTROL_FLUSH_ENABLE                    = (1 << 18),
-   PIPE_CONTROL_DATA_CACHE_FLUSH                = (1 << 19),
-   PIPE_CONTROL_VF_CACHE_INVALIDATE             = (1 << 20),
-   PIPE_CONTROL_CONST_CACHE_INVALIDATE          = (1 << 21),
-   PIPE_CONTROL_STATE_CACHE_INVALIDATE          = (1 << 22),
-   PIPE_CONTROL_STALL_AT_SCOREBOARD             = (1 << 23),
-   PIPE_CONTROL_DEPTH_CACHE_FLUSH               = (1 << 24),
-};
-
-#define PIPE_CONTROL_CACHE_FLUSH_BITS \
-   (PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \
-    PIPE_CONTROL_RENDER_TARGET_FLUSH)
-
-#define PIPE_CONTROL_CACHE_INVALIDATE_BITS \
-   (PIPE_CONTROL_STATE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | \
-    PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
-    PIPE_CONTROL_INSTRUCTION_INVALIDATE)
-
-/** @} */
-
-int brw_init_pipe_control(struct brw_context *brw,
-                          const struct intel_device_info *info);
-void brw_fini_pipe_control(struct brw_context *brw);
-
-void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
-void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
-                                 struct brw_bo *bo, uint32_t offset,
-                                 uint64_t imm);
-void brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags);
-void brw_emit_mi_flush(struct brw_context *brw);
-void brw_emit_post_sync_nonzero_flush(struct brw_context *brw);
-void brw_emit_depth_stall_flushes(struct brw_context *brw);
-void gfx7_emit_vs_workaround_flush(struct brw_context *brw);
-void gfx7_emit_cs_stall_flush(struct brw_context *brw);
-void gfx7_emit_isp_disable(struct brw_context *brw);
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_pixel.c b/src/mesa/drivers/dri/i965/brw_pixel.c
deleted file mode 100644
index b6a2c51..0000000
--- a/src/mesa/drivers/dri/i965/brw_pixel.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portionsalloc
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/accum.h"
-#include "main/enums.h"
-#include "main/state.h"
-#include "main/stencil.h"
-#include "main/bufferobj.h"
-#include "main/context.h"
-#include "swrast/swrast.h"
-
-#include "brw_context.h"
-#include "brw_pixel.h"
-
-#define FILE_DEBUG_FLAG DEBUG_PIXEL
-
-static GLenum
-effective_func(GLenum func, bool src_alpha_is_one)
-{
-   if (src_alpha_is_one) {
-      if (func == GL_SRC_ALPHA)
-         return GL_ONE;
-      if (func == GL_ONE_MINUS_SRC_ALPHA)
-         return GL_ZERO;
-   }
-
-   return func;
-}
-
-/**
- * Check if any fragment operations are in effect which might effect
- * glDraw/CopyPixels.
- */
-bool
-brw_check_blit_fragment_ops(struct gl_context * ctx, bool src_alpha_is_one)
-{
-   if (ctx->NewState)
-      _mesa_update_state(ctx);
-
-   if (_mesa_arb_fragment_program_enabled(ctx)) {
-      DBG("fallback due to fragment program\n");
-      return false;
-   }
-
-   if (ctx->Color.BlendEnabled &&
-       (effective_func(ctx->Color.Blend[0].SrcRGB, src_alpha_is_one) != GL_ONE ||
-        effective_func(ctx->Color.Blend[0].DstRGB, src_alpha_is_one) != GL_ZERO ||
-        ctx->Color.Blend[0].EquationRGB != GL_FUNC_ADD ||
-        effective_func(ctx->Color.Blend[0].SrcA, src_alpha_is_one) != GL_ONE ||
-        effective_func(ctx->Color.Blend[0].DstA, src_alpha_is_one) != GL_ZERO ||
-        ctx->Color.Blend[0].EquationA != GL_FUNC_ADD)) {
-      DBG("fallback due to blend\n");
-      return false;
-   }
-
-   if (ctx->Texture._MaxEnabledTexImageUnit != -1) {
-      DBG("fallback due to texturing\n");
-      return false;
-   }
-
-   if (GET_COLORMASK(ctx->Color.ColorMask, 0) != 0xf) {
-      DBG("fallback due to color masking\n");
-      return false;
-   }
-
-   if (ctx->Color.AlphaEnabled) {
-      DBG("fallback due to alpha\n");
-      return false;
-   }
-
-   if (ctx->Depth.Test) {
-      DBG("fallback due to depth test\n");
-      return false;
-   }
-
-   if (ctx->Fog.Enabled) {
-      DBG("fallback due to fog\n");
-      return false;
-   }
-
-   if (ctx->_ImageTransferState) {
-      DBG("fallback due to image transfer\n");
-      return false;
-   }
-
-   if (_mesa_stencil_is_enabled(ctx)) {
-      DBG("fallback due to image stencil\n");
-      return false;
-   }
-
-   if (ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) {
-      DBG("fallback due to pixel zoom\n");
-      return false;
-   }
-
-   if (ctx->RenderMode != GL_RENDER) {
-      DBG("fallback due to render mode\n");
-      return false;
-   }
-
-   return true;
-}
-
-void
-brw_init_pixel_functions(struct dd_function_table *functions)
-{
-   functions->Bitmap = brw_bitmap;
-   functions->CopyPixels = brw_copypixels;
-   functions->DrawPixels = brw_drawpixels;
-   functions->ReadPixels = brw_readpixels;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_pixel.h b/src/mesa/drivers/dri/i965/brw_pixel.h
deleted file mode 100644
index b6e3e6e..0000000
--- a/src/mesa/drivers/dri/i965/brw_pixel.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef BRW_PIXEL_H
-#define BRW_PIXEL_H
-
-#include "main/mtypes.h"
-
-void brw_init_pixel_functions(struct dd_function_table *functions);
-bool brw_check_blit_fragment_ops(struct gl_context *ctx,
-                                 bool src_alpha_is_one);
-
-void brw_readpixels(struct gl_context *ctx,
-                    GLint x, GLint y,
-                    GLsizei width, GLsizei height,
-                    GLenum format, GLenum type,
-                    const struct gl_pixelstore_attrib *pack,
-                    GLvoid *pixels);
-
-void brw_drawpixels(struct gl_context *ctx,
-                    GLint x, GLint y,
-                    GLsizei width, GLsizei height,
-                    GLenum format,
-                    GLenum type,
-                    const struct gl_pixelstore_attrib *unpack,
-                    const GLvoid *pixels);
-
-void brw_copypixels(struct gl_context *ctx,
-                    GLint srcx, GLint srcy,
-                    GLsizei width, GLsizei height,
-                    GLint destx, GLint desty, GLenum type);
-
-void brw_bitmap(struct gl_context *ctx,
-                GLint x, GLint y,
-                GLsizei width, GLsizei height,
-                const struct gl_pixelstore_attrib *unpack,
-                const GLubyte *pixels);
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_pixel_bitmap.c b/src/mesa/drivers/dri/i965/brw_pixel_bitmap.c
deleted file mode 100644
index aa8c2fc..0000000
--- a/src/mesa/drivers/dri/i965/brw_pixel_bitmap.c
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portionsalloc
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/blend.h"
-#include "main/enums.h"
-#include "main/image.h"
-#include "main/colormac.h"
-#include "main/condrender.h"
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/pbo.h"
-#include "main/bufferobj.h"
-#include "main/state.h"
-#include "main/texobj.h"
-#include "main/context.h"
-#include "main/fbobject.h"
-#include "swrast/swrast.h"
-#include "drivers/common/meta.h"
-
-#include "brw_context.h"
-#include "brw_screen.h"
-#include "brw_batch.h"
-#include "brw_blit.h"
-#include "brw_fbo.h"
-#include "brw_image.h"
-#include "brw_buffers.h"
-#include "brw_pixel.h"
-
-
-#define FILE_DEBUG_FLAG DEBUG_PIXEL
-
-
-/* Unlike the other intel_pixel_* functions, the expectation here is
- * that the incoming data is not in a PBO.  With the XY_TEXT blit
- * method, there's no benefit haveing it in a PBO, but we could
- * implement a path based on XY_MONO_SRC_COPY_BLIT which might benefit
- * PBO bitmaps.  I think they are probably pretty rare though - I
- * wonder if Xgl uses them?
- */
-static const GLubyte *
-map_pbo(struct gl_context *ctx,
-        GLsizei width, GLsizei height,
-        const struct gl_pixelstore_attrib *unpack,
-        const GLubyte *bitmap)
-{
-   GLubyte *buf;
-
-   if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
-                                  GL_COLOR_INDEX, GL_BITMAP,
-                                  INT_MAX, (const GLvoid *) bitmap)) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)");
-      return NULL;
-   }
-
-   buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
-                                                GL_MAP_READ_BIT,
-                                                unpack->BufferObj,
-                                                MAP_INTERNAL);
-   if (!buf) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
-      return NULL;
-   }
-
-   return ADD_POINTERS(buf, bitmap);
-}
-
-static bool test_bit( const GLubyte *src, GLuint bit )
-{
-   return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0;
-}
-
-static void set_bit( GLubyte *dest, GLuint bit )
-{
-   dest[bit/8] |= 1 << (bit % 8);
-}
-
-/* Extract a rectangle's worth of data from the bitmap.  Called
- * per chunk of HW-sized bitmap.
- */
-static GLuint
-get_bitmap_rect(GLsizei width, GLsizei height,
-                const struct gl_pixelstore_attrib *unpack,
-                const GLubyte *bitmap,
-                GLuint x, GLuint y,
-                GLuint w, GLuint h,
-                GLubyte *dest,
-                GLuint row_align,
-                bool invert)
-{
-   GLuint src_offset = (x + unpack->SkipPixels) & 0x7;
-   GLuint mask = unpack->LsbFirst ? 0 : 7;
-   GLuint bit = 0;
-   GLint row, col;
-   GLint first, last;
-   GLint incr;
-   GLuint count = 0;
-
-   DBG("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n",
-       __func__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask);
-
-   if (invert) {
-      first = h-1;
-      last = 0;
-      incr = -1;
-   }
-   else {
-      first = 0;
-      last = h-1;
-      incr = 1;
-   }
-
-   /* Require that dest be pre-zero'd.
-    */
-   for (row = first; row != (last+incr); row += incr) {
-      const GLubyte *rowsrc = _mesa_image_address2d(unpack, bitmap,
-                                                    width, height,
-                                                    GL_COLOR_INDEX, GL_BITMAP,
-                                                    y + row, x);
-
-      for (col = 0; col < w; col++, bit++) {
-         if (test_bit(rowsrc, (col + src_offset) ^ mask)) {
-            set_bit(dest, bit ^ 7);
-            count++;
-         }
-      }
-
-      if (row_align)
-         bit = ALIGN(bit, row_align);
-   }
-
-   return count;
-}
-
-/**
- * Returns the low Y value of the vertical range given, flipped according to
- * whether the framebuffer is or not.
- */
-static inline int
-y_flip(struct gl_framebuffer *fb, int y, int height)
-{
-   if (fb->FlipY)
-      return fb->Height - y - height;
-   else
-      return y;
-}
-
-/*
- * Render a bitmap.
- */
-static bool
-do_blit_bitmap(struct gl_context *ctx,
-               GLint dstx, GLint dsty,
-               GLsizei width, GLsizei height,
-               const struct gl_pixelstore_attrib *unpack,
-               const GLubyte *bitmap)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
-   struct brw_renderbuffer *irb;
-   GLfloat tmpColor[4];
-   GLubyte ubcolor[4];
-   GLuint color;
-   GLsizei bitmap_width = width;
-   GLsizei bitmap_height = height;
-   GLint px, py;
-   GLuint stipple[32];
-   GLint orig_dstx = dstx;
-   GLint orig_dsty = dsty;
-
-   /* Update draw buffer bounds */
-   _mesa_update_state(ctx);
-
-   if (ctx->Depth.Test) {
-      /* The blit path produces incorrect results when depth testing is on.
-       * It seems the blit Z coord is always 1.0 (the far plane) so fragments
-       * will likely be obscured by other, closer geometry.
-       */
-      return false;
-   }
-
-   brw_prepare_render(brw);
-
-   if (fb->_NumColorDrawBuffers != 1) {
-      perf_debug("accelerated glBitmap() only supports rendering to a "
-                 "single color buffer\n");
-      return false;
-   }
-
-   irb = brw_renderbuffer(fb->_ColorDrawBuffers[0]);
-
-   if (unpack->BufferObj) {
-      bitmap = map_pbo(ctx, width, height, unpack, bitmap);
-      if (bitmap == NULL)
-         return true; /* even though this is an error, we're done */
-   }
-
-   COPY_4V(tmpColor, ctx->Current.RasterColor);
-
-   if (_mesa_need_secondary_color(ctx)) {
-       ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor);
-   }
-
-   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[0], tmpColor[0]);
-   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[1], tmpColor[1]);
-   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]);
-   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]);
-
-   switch (_mesa_get_render_format(ctx, brw_rb_format(irb))) {
-   case MESA_FORMAT_B8G8R8A8_UNORM:
-   case MESA_FORMAT_B8G8R8X8_UNORM:
-      color = PACK_COLOR_8888(ubcolor[3], ubcolor[0], ubcolor[1], ubcolor[2]);
-      break;
-   case MESA_FORMAT_B5G6R5_UNORM:
-      color = PACK_COLOR_565(ubcolor[0], ubcolor[1], ubcolor[2]);
-      break;
-   default:
-      perf_debug("Unsupported format %s in accelerated glBitmap()\n",
-                 _mesa_get_format_name(irb->mt->format));
-      return false;
-   }
-
-   if (!brw_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F))
-      return false;
-
-   /* Clip to buffer bounds and scissor. */
-   if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
-                             fb->_Xmax, fb->_Ymax,
-                             &dstx, &dsty, &width, &height))
-      goto out;
-
-   dsty = y_flip(fb, dsty, height);
-
-#define DY 32
-#define DX 32
-
-   /* The blitter has no idea about fast color clears, so we need to resolve
-    * the miptree before we do anything.
-    */
-   brw_miptree_access_raw(brw, irb->mt, irb->mt_level, irb->mt_layer, true);
-
-   /* Chop it all into chunks that can be digested by hardware: */
-   for (py = 0; py < height; py += DY) {
-      for (px = 0; px < width; px += DX) {
-         int h = MIN2(DY, height - py);
-         int w = MIN2(DX, width - px);
-         GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8;
-         const enum gl_logicop_mode logic_op = ctx->Color.ColorLogicOpEnabled ?
-            ctx->Color._LogicOp : COLOR_LOGICOP_COPY;
-
-         assert(sz <= sizeof(stipple));
-         memset(stipple, 0, sz);
-
-         /* May need to adjust this when padding has been introduced in
-          * sz above:
-          *
-          * Have to translate destination coordinates back into source
-          * coordinates.
-          */
-         int count = get_bitmap_rect(bitmap_width, bitmap_height, unpack,
-                                     bitmap,
-                                     -orig_dstx + (dstx + px),
-                                     -orig_dsty + y_flip(fb, dsty + py, h),
-                                     w, h,
-                                     (GLubyte *)stipple,
-                                     8,
-                                     fb->FlipY);
-         if (count == 0)
-            continue;
-
-         if (!brw_emit_immediate_color_expand_blit(brw,
-                                                   irb->mt->cpp,
-                                                   (GLubyte *)stipple,
-                                                   sz,
-                                                   color,
-                                                   irb->mt->surf.row_pitch_B,
-                                                   irb->mt->bo,
-                                                   irb->mt->offset,
-                                                   irb->mt->surf.tiling,
-                                                   dstx + px,
-                                                   dsty + py,
-                                                   w, h,
-                                                   logic_op)) {
-            return false;
-         }
-
-         if (ctx->Query.CurrentOcclusionObject)
-            ctx->Query.CurrentOcclusionObject->Result += count;
-      }
-   }
-out:
-
-   if (INTEL_DEBUG(DEBUG_SYNC))
-      brw_batch_flush(brw);
-
-   if (unpack->BufferObj) {
-      /* done with PBO so unmap it now */
-      ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj, MAP_INTERNAL);
-   }
-
-   return true;
-}
-
-
-/* There are a large number of possible ways to implement bitmap on
- * this hardware, most of them have some sort of drawback.  Here are a
- * few that spring to mind:
- *
- * Blit:
- *    - XY_MONO_SRC_BLT_CMD
- *         - use XY_SETUP_CLIP_BLT for cliprect clipping.
- *    - XY_TEXT_BLT
- *    - XY_TEXT_IMMEDIATE_BLT
- *         - blit per cliprect, subject to maximum immediate data size.
- *    - XY_COLOR_BLT
- *         - per pixel or run of pixels
- *    - XY_PIXEL_BLT
- *         - good for sparse bitmaps
- *
- * 3D engine:
- *    - Point per pixel
- *    - Translate bitmap to an alpha texture and render as a quad
- *    - Chop bitmap up into 32x32 squares and render w/polygon stipple.
- */
-void
-brw_bitmap(struct gl_context * ctx,
-           GLint x, GLint y,
-           GLsizei width, GLsizei height,
-           const struct gl_pixelstore_attrib *unpack,
-           const GLubyte * pixels)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   if (!_mesa_check_conditional_render(ctx))
-      return;
-
-   if (brw->screen->devinfo.ver < 6 &&
-       do_blit_bitmap(ctx, x, y, width, height, unpack, pixels))
-      return;
-
-   _mesa_meta_Bitmap(ctx, x, y, width, height, unpack, pixels);
-}
diff --git a/src/mesa/drivers/dri/i965/brw_pixel_copy.c b/src/mesa/drivers/dri/i965/brw_pixel_copy.c
deleted file mode 100644
index 5527ffb..0000000
--- a/src/mesa/drivers/dri/i965/brw_pixel_copy.c
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/image.h"
-#include "main/state.h"
-#include "main/stencil.h"
-#include "main/mtypes.h"
-#include "main/condrender.h"
-#include "main/fbobject.h"
-#include "drivers/common/meta.h"
-
-#include "brw_context.h"
-#include "brw_buffers.h"
-#include "brw_mipmap_tree.h"
-#include "brw_pixel.h"
-#include "brw_fbo.h"
-#include "brw_blit.h"
-#include "brw_batch.h"
-
-#define FILE_DEBUG_FLAG DEBUG_PIXEL
-
-/**
- * CopyPixels with the blitter.  Don't support zooming, pixel transfer, etc.
- */
-static bool
-do_blit_copypixels(struct gl_context * ctx,
-                   GLint srcx, GLint srcy,
-                   GLsizei width, GLsizei height,
-                   GLint dstx, GLint dsty, GLenum type)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
-   struct gl_framebuffer *read_fb = ctx->ReadBuffer;
-   GLint orig_dstx;
-   GLint orig_dsty;
-   GLint orig_srcx;
-   GLint orig_srcy;
-   struct brw_renderbuffer *draw_irb = NULL;
-   struct brw_renderbuffer *read_irb = NULL;
-
-   /* Update draw buffer bounds */
-   _mesa_update_state(ctx);
-
-   brw_prepare_render(brw);
-
-   switch (type) {
-   case GL_COLOR:
-      if (fb->_NumColorDrawBuffers != 1) {
-         perf_debug("glCopyPixels() fallback: MRT\n");
-         return false;
-      }
-
-      draw_irb = brw_renderbuffer(fb->_ColorDrawBuffers[0]);
-      read_irb = brw_renderbuffer(read_fb->_ColorReadBuffer);
-      break;
-   case GL_DEPTH_STENCIL_EXT:
-      draw_irb = brw_renderbuffer(fb->Attachment[BUFFER_DEPTH].Renderbuffer);
-      read_irb =
-         brw_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer);
-      break;
-   case GL_DEPTH:
-      perf_debug("glCopyPixels() fallback: GL_DEPTH\n");
-      return false;
-   case GL_STENCIL:
-      perf_debug("glCopyPixels() fallback: GL_STENCIL\n");
-      return false;
-   default:
-      perf_debug("glCopyPixels(): Unknown type\n");
-      return false;
-   }
-
-   if (!draw_irb) {
-      perf_debug("glCopyPixels() fallback: missing draw buffer\n");
-      return false;
-   }
-
-   if (!read_irb) {
-      perf_debug("glCopyPixels() fallback: missing read buffer\n");
-      return false;
-   }
-
-   if (draw_irb->mt->surf.samples > 1 || read_irb->mt->surf.samples > 1) {
-      perf_debug("glCopyPixels() fallback: multisampled buffers\n");
-      return false;
-   }
-
-   if (ctx->_ImageTransferState) {
-      perf_debug("glCopyPixels(): Unsupported image transfer state\n");
-      return false;
-   }
-
-   if (ctx->Depth.Test) {
-      perf_debug("glCopyPixels(): Unsupported depth test state\n");
-      return false;
-   }
-
-   if (brw->stencil_enabled) {
-      perf_debug("glCopyPixels(): Unsupported stencil test state\n");
-      return false;
-   }
-
-   if (ctx->Fog.Enabled ||
-       ctx->Texture._MaxEnabledTexImageUnit != -1 ||
-       _mesa_arb_fragment_program_enabled(ctx)) {
-      perf_debug("glCopyPixels(): Unsupported fragment shader state\n");
-      return false;
-   }
-
-   if (ctx->Color.AlphaEnabled ||
-       ctx->Color.BlendEnabled) {
-      perf_debug("glCopyPixels(): Unsupported blend state\n");
-      return false;
-   }
-
-   if (GET_COLORMASK(ctx->Color.ColorMask, 0) != 0xf) {
-      perf_debug("glCopyPixels(): Unsupported color mask state\n");
-      return false;
-   }
-
-   if (ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) {
-      perf_debug("glCopyPixels(): Unsupported pixel zoom\n");
-      return false;
-   }
-
-   brw_batch_flush(brw);
-
-   /* Clip to destination buffer. */
-   orig_dstx = dstx;
-   orig_dsty = dsty;
-   if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
-                             fb->_Xmax, fb->_Ymax,
-                             &dstx, &dsty, &width, &height))
-      goto out;
-   /* Adjust src coords for our post-clipped destination origin */
-   srcx += dstx - orig_dstx;
-   srcy += dsty - orig_dsty;
-
-   /* Clip to source buffer. */
-   orig_srcx = srcx;
-   orig_srcy = srcy;
-   if (!_mesa_clip_to_region(0, 0,
-                             read_fb->Width, read_fb->Height,
-                             &srcx, &srcy, &width, &height))
-      goto out;
-   /* Adjust dst coords for our post-clipped source origin */
-   dstx += srcx - orig_srcx;
-   dsty += srcy - orig_srcy;
-
-   if (!brw_miptree_blit(brw,
-                           read_irb->mt, read_irb->mt_level, read_irb->mt_layer,
-                           srcx, srcy, read_fb->FlipY,
-                           draw_irb->mt, draw_irb->mt_level, draw_irb->mt_layer,
-                           dstx, dsty, fb->FlipY,
-                           width, height,
-                           (ctx->Color.ColorLogicOpEnabled ?
-                            ctx->Color._LogicOp : COLOR_LOGICOP_COPY))) {
-      DBG("%s: blit failure\n", __func__);
-      return false;
-   }
-
-   if (ctx->Query.CurrentOcclusionObject)
-      ctx->Query.CurrentOcclusionObject->Result += width * height;
-
-out:
-
-   DBG("%s: success\n", __func__);
-   return true;
-}
-
-
-void
-brw_copypixels(struct gl_context *ctx,
-               GLint srcx, GLint srcy,
-               GLsizei width, GLsizei height,
-               GLint destx, GLint desty, GLenum type)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   DBG("%s\n", __func__);
-
-   if (!_mesa_check_conditional_render(ctx))
-      return;
-
-   if (brw->screen->devinfo.ver < 6 &&
-       do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
-      return;
-
-   /* this will use swrast if needed */
-   _mesa_meta_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
-}
diff --git a/src/mesa/drivers/dri/i965/brw_pixel_draw.c b/src/mesa/drivers/dri/i965/brw_pixel_draw.c
deleted file mode 100644
index aaf81f4..0000000
--- a/src/mesa/drivers/dri/i965/brw_pixel_draw.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright 2006 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portionsalloc
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/enums.h"
-#include "main/image.h"
-#include "main/glformats.h"
-#include "main/mtypes.h"
-#include "main/condrender.h"
-#include "main/fbobject.h"
-#include "main/teximage.h"
-#include "main/texobj.h"
-#include "main/texstate.h"
-#include "main/bufferobj.h"
-#include "swrast/swrast.h"
-#include "drivers/common/meta.h"
-
-#include "brw_context.h"
-#include "brw_screen.h"
-#include "brw_blit.h"
-#include "brw_buffers.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-#include "brw_pixel.h"
-#include "brw_buffer_objects.h"
-
-#define FILE_DEBUG_FLAG DEBUG_PIXEL
-
-static bool
-do_blit_drawpixels(struct gl_context * ctx,
-                   GLint x, GLint y, GLsizei width, GLsizei height,
-                   GLenum format, GLenum type,
-                   const struct gl_pixelstore_attrib *unpack,
-                   const GLvoid * pixels)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_buffer_object *src = brw_buffer_object(unpack->BufferObj);
-   GLuint src_offset;
-   struct brw_bo *src_buffer;
-
-   DBG("%s\n", __func__);
-
-   if (!brw_check_blit_fragment_ops(ctx, false))
-      return false;
-
-   if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
-      DBG("%s: fallback due to MRT\n", __func__);
-      return false;
-   }
-
-   brw_prepare_render(brw);
-
-   struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
-   struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-
-   mesa_format src_format = _mesa_format_from_format_and_type(format, type);
-   if (_mesa_format_is_mesa_array_format(src_format))
-      src_format = _mesa_format_from_array_format(src_format);
-   mesa_format dst_format = irb->mt->format;
-
-   /* We can safely discard sRGB encode/decode for the DrawPixels interface */
-   src_format = _mesa_get_srgb_format_linear(src_format);
-   dst_format = _mesa_get_srgb_format_linear(dst_format);
-
-   if (!brw_miptree_blit_compatible_formats(src_format, dst_format)) {
-      DBG("%s: bad format for blit\n", __func__);
-      return false;
-   }
-
-   if (unpack->SwapBytes || unpack->LsbFirst ||
-       unpack->SkipPixels || unpack->SkipRows) {
-      DBG("%s: bad packing params\n", __func__);
-      return false;
-   }
-
-   int src_stride = _mesa_image_row_stride(unpack, width, format, type);
-   bool src_flip = false;
-   /* Mesa flips the src_stride for unpack->Invert, but we want our mt to have
-    * a normal src_stride.
-    */
-   if (unpack->Invert) {
-      src_stride = -src_stride;
-      src_flip = true;
-   }
-
-   src_offset = (GLintptr)pixels;
-   src_offset += _mesa_image_offset(2, unpack, width, height,
-                                    format, type, 0, 0, 0);
-
-   src_buffer = brw_bufferobj_buffer(brw, src, src_offset,
-                                     height * src_stride, false);
-
-   struct brw_mipmap_tree *pbo_mt =
-      brw_miptree_create_for_bo(brw,
-                                  src_buffer,
-                                  irb->mt->format,
-                                  src_offset,
-                                  width, height, 1,
-                                  src_stride,
-                                  ISL_TILING_LINEAR,
-                                  MIPTREE_CREATE_DEFAULT);
-   if (!pbo_mt)
-      return false;
-
-   if (!brw_miptree_blit(brw,
-                           pbo_mt, 0, 0,
-                           0, 0, src_flip,
-                           irb->mt, irb->mt_level, irb->mt_layer,
-                           x, y, ctx->DrawBuffer->FlipY,
-                           width, height, COLOR_LOGICOP_COPY)) {
-      DBG("%s: blit failed\n", __func__);
-      brw_miptree_release(&pbo_mt);
-      return false;
-   }
-
-   brw_miptree_release(&pbo_mt);
-
-   if (ctx->Query.CurrentOcclusionObject)
-      ctx->Query.CurrentOcclusionObject->Result += width * height;
-
-   DBG("%s: success\n", __func__);
-   return true;
-}
-
-void
-brw_drawpixels(struct gl_context *ctx,
-               GLint x, GLint y,
-               GLsizei width, GLsizei height,
-               GLenum format,
-               GLenum type,
-               const struct gl_pixelstore_attrib *unpack,
-               const GLvoid *pixels)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   if (!_mesa_check_conditional_render(ctx))
-      return;
-
-   if (format == GL_STENCIL_INDEX) {
-      _swrast_DrawPixels(ctx, x, y, width, height, format, type,
-                         unpack, pixels);
-      return;
-   }
-
-   if (brw->screen->devinfo.ver < 6 &&
-       unpack->BufferObj) {
-      if (do_blit_drawpixels(ctx, x, y, width, height, format, type, unpack,
-                             pixels)) {
-         return;
-      }
-
-      perf_debug("%s: fallback to generic code in PBO case\n", __func__);
-   }
-
-   _mesa_meta_DrawPixels(ctx, x, y, width, height, format, type,
-                         unpack, pixels);
-}
diff --git a/src/mesa/drivers/dri/i965/brw_pixel_read.c b/src/mesa/drivers/dri/i965/brw_pixel_read.c
deleted file mode 100644
index ad0ee97..0000000
--- a/src/mesa/drivers/dri/i965/brw_pixel_read.c
+++ /dev/null
@@ -1,300 +0,0 @@
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/enums.h"
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/fbobject.h"
-#include "main/image.h"
-#include "main/bufferobj.h"
-#include "main/readpix.h"
-#include "main/state.h"
-#include "main/glformats.h"
-#include "program/prog_instruction.h"
-#include "drivers/common/meta.h"
-
-#include "brw_context.h"
-#include "brw_blorp.h"
-#include "brw_screen.h"
-#include "brw_batch.h"
-#include "brw_buffers.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-#include "brw_pixel.h"
-#include "brw_buffer_objects.h"
-
-#define FILE_DEBUG_FLAG DEBUG_PIXEL
-
-/**
- * \brief A fast path for glReadPixels
- *
- * This fast path is taken when the source format is BGRA, RGBA,
- * A or L and when the texture memory is X- or Y-tiled.  It downloads
- * the source data by directly mapping the memory without a GTT fence.
- * This then needs to be de-tiled on the CPU before presenting the data to
- * the user in the linear fasion.
- *
- * This is a performance win over the conventional texture download path.
- * In the conventional texture download path, the texture is either mapped
- * through the GTT or copied to a linear buffer with the blitter before
- * handing off to a software path.  This allows us to avoid round-tripping
- * through the GPU (in the case where we would be blitting) and do only a
- * single copy operation.
- */
-static bool
-brw_readpixels_tiled_memcpy(struct gl_context *ctx,
-                            GLint xoffset, GLint yoffset,
-                            GLsizei width, GLsizei height,
-                            GLenum format, GLenum type,
-                            GLvoid * pixels,
-                            const struct gl_pixelstore_attrib *pack)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   /* This path supports reading from color buffers only */
-   if (rb == NULL)
-      return false;
-
-   struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-   int dst_pitch;
-
-   /* The miptree's buffer. */
-   struct brw_bo *bo;
-
-   uint32_t cpp;
-   isl_memcpy_type copy_type;
-
-   /* This fastpath is restricted to specific renderbuffer types:
-    * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
-    * more types.
-    */
-   if (!devinfo->has_llc ||
-       !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
-       pixels == NULL ||
-       pack->BufferObj ||
-       pack->Alignment > 4 ||
-       pack->SkipPixels > 0 ||
-       pack->SkipRows > 0 ||
-       (pack->RowLength != 0 && pack->RowLength != width) ||
-       pack->SwapBytes ||
-       pack->LsbFirst ||
-       pack->Invert)
-      return false;
-
-   /* Only a simple blit, no scale, bias or other mapping. */
-   if (ctx->_ImageTransferState)
-      return false;
-
-   /* It is possible that the renderbuffer (or underlying texture) is
-    * multisampled.  Since ReadPixels from a multisampled buffer requires a
-    * multisample resolve, we can't handle this here
-    */
-   if (rb->NumSamples > 1)
-      return false;
-
-   /* We can't handle copying from RGBX or BGRX because the tiled_memcpy
-    * function doesn't set the last channel to 1. Note this checks BaseFormat
-    * rather than TexFormat in case the RGBX format is being simulated with an
-    * RGBA format.
-    */
-   if (rb->_BaseFormat == GL_RGB)
-      return false;
-
-   copy_type = brw_miptree_get_memcpy_type(rb->Format, format, type, &cpp);
-   if (copy_type == ISL_MEMCPY_INVALID)
-      return false;
-
-   if (!irb->mt ||
-       (irb->mt->surf.tiling != ISL_TILING_X &&
-        irb->mt->surf.tiling != ISL_TILING_Y0)) {
-      /* The algorithm is written only for X- or Y-tiled memory. */
-      return false;
-   }
-
-   /* tiled_to_linear() assumes that if the object is swizzled, it is using
-    * I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y.  This is only
-    * true on gfx5 and above.
-    *
-    * The killer on top is that some gfx4 have an L-shaped swizzle mode, where
-    * parts of the memory aren't swizzled at all. Userspace just can't handle
-    * that.
-    */
-   if (devinfo->ver < 5 && devinfo->has_bit6_swizzle)
-      return false;
-
-   /* Since we are going to read raw data to the miptree, we need to resolve
-    * any pending fast color clears before we start.
-    */
-   brw_miptree_access_raw(brw, irb->mt, irb->mt_level, irb->mt_layer, false);
-
-   bo = irb->mt->bo;
-
-   if (brw_batch_references(&brw->batch, bo)) {
-      perf_debug("Flushing before mapping a referenced bo.\n");
-      brw_batch_flush(brw);
-   }
-
-   void *map = brw_bo_map(brw, bo, MAP_READ | MAP_RAW);
-   if (map == NULL) {
-      DBG("%s: failed to map bo\n", __func__);
-      return false;
-   }
-
-   unsigned slice_offset_x, slice_offset_y;
-   brw_miptree_get_image_offset(irb->mt, irb->mt_level, irb->mt_layer,
-                                  &slice_offset_x, &slice_offset_y);
-   xoffset += slice_offset_x;
-   yoffset += slice_offset_y;
-
-   dst_pitch = _mesa_image_row_stride(pack, width, format, type);
-
-   /* For a window-system renderbuffer, the buffer is actually flipped
-    * vertically, so we need to handle that.  Since the detiling function
-    * can only really work in the forwards direction, we have to be a
-    * little creative.  First, we compute the Y-offset of the first row of
-    * the renderbuffer (in renderbuffer coordinates).  We then match that
-    * with the last row of the client's data.  Finally, we give
-    * tiled_to_linear a negative pitch so that it walks through the
-    * client's data backwards as it walks through the renderbufer forwards.
-    */
-   if (ctx->ReadBuffer->FlipY) {
-      yoffset = rb->Height - yoffset - height;
-      pixels += (ptrdiff_t) (height - 1) * dst_pitch;
-      dst_pitch = -dst_pitch;
-   }
-
-   /* We postponed printing this message until having committed to executing
-    * the function.
-    */
-   DBG("%s: x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
-       "mesa_format=0x%x tiling=%d "
-       "pack=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n",
-       __func__, xoffset, yoffset, width, height,
-       format, type, rb->Format, irb->mt->surf.tiling,
-       pack->Alignment, pack->RowLength, pack->SkipPixels,
-       pack->SkipRows);
-
-   isl_memcpy_tiled_to_linear(
-      xoffset * cpp, (xoffset + width) * cpp,
-      yoffset, yoffset + height,
-      pixels,
-      map + irb->mt->offset,
-      dst_pitch, irb->mt->surf.row_pitch_B,
-      devinfo->has_bit6_swizzle,
-      irb->mt->surf.tiling,
-      copy_type
-   );
-
-   brw_bo_unmap(bo);
-   return true;
-}
-
-static bool
-brw_readpixels_blorp(struct gl_context *ctx,
-                     unsigned x, unsigned y,
-                     unsigned w, unsigned h,
-                     GLenum format, GLenum type, const void *pixels,
-                     const struct gl_pixelstore_attrib *packing)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
-   if (!rb)
-      return false;
-
-   struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-
-   /* _mesa_get_readpixels_transfer_ops() includes the cases of read
-    * color clamping along with the ctx->_ImageTransferState.
-    */
-   if (_mesa_get_readpixels_transfer_ops(ctx, rb->Format, format,
-                                         type, GL_FALSE))
-      return false;
-
-   GLenum dst_base_format = _mesa_unpack_format_to_base_format(format);
-   if (_mesa_need_rgb_to_luminance_conversion(rb->_BaseFormat,
-                                              dst_base_format))
-      return false;
-
-   unsigned swizzle;
-   if (irb->Base.Base._BaseFormat == GL_RGB) {
-      swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
-   } else {
-      swizzle = SWIZZLE_XYZW;
-   }
-
-   return brw_blorp_download_miptree(brw, irb->mt, rb->Format, swizzle,
-                                     irb->mt_level, x, y, irb->mt_layer,
-                                     w, h, 1, GL_TEXTURE_2D, format, type,
-                                     ctx->ReadBuffer->FlipY, pixels, packing);
-}
-
-void
-brw_readpixels(struct gl_context *ctx,
-               GLint x, GLint y, GLsizei width, GLsizei height,
-               GLenum format, GLenum type,
-               const struct gl_pixelstore_attrib *pack, GLvoid *pixels)
-{
-   bool ok;
-
-   struct brw_context *brw = brw_context(ctx);
-   bool dirty;
-
-   DBG("%s\n", __func__);
-
-   /* Reading pixels wont dirty the front buffer, so reset the dirty
-    * flag after calling brw_prepare_render().
-    */
-   dirty = brw->front_buffer_dirty;
-   brw_prepare_render(brw);
-   brw->front_buffer_dirty = dirty;
-
-   if (pack->BufferObj) {
-      if (brw_readpixels_blorp(ctx, x, y, width, height,
-                               format, type, pixels, pack))
-         return;
-
-      perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
-   }
-
-   ok = brw_readpixels_tiled_memcpy(ctx, x, y, width, height,
-                                    format, type, pixels, pack);
-   if(ok)
-      return;
-
-   /* Update Mesa state before calling _mesa_readpixels().
-    * XXX this may not be needed since ReadPixels no longer uses the
-    * span code.
-    */
-
-   if (ctx->NewState)
-      _mesa_update_state(ctx);
-
-   _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);
-
-   /* There's an brw_prepare_render() call in intelSpanRenderStart(). */
-   brw->front_buffer_dirty = dirty;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_primitive_restart.c b/src/mesa/drivers/dri/i965/brw_primitive_restart.c
deleted file mode 100644
index 21f1c79..0000000
--- a/src/mesa/drivers/dri/i965/brw_primitive_restart.c
+++ /dev/null
@@ -1,462 +0,0 @@
-/*
- * Copyright Â© 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Jordan Justen <jordan.l.justen@intel.com>
- *
- */
-
-#include "main/bufferobj.h"
-#include "main/varray.h"
-#include "vbo/vbo.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_draw.h"
-
-#include "brw_batch.h"
-
-
-#define UPDATE_MIN2(a, b) (a) = MIN2((a), (b))
-#define UPDATE_MAX2(a, b) (a) = MAX2((a), (b))
-
-/*
- * Notes on primitive restart:
- * The code below is used when the driver does not fully support primitive
- * restart (for example, if it only does restart index of ~0).
- *
- * We map the index buffer, find the restart indexes, unmap
- * the index buffer then draw the sub-primitives delineated by the restarts.
- *
- * A couple possible optimizations:
- * 1. Save the list of sub-primitive (start, count) values in a list attached
- *    to the index buffer for re-use in subsequent draws.  The list would be
- *    invalidated when the contents of the buffer changed.
- * 2. If drawing triangle strips or quad strips, create a new index buffer
- *    that uses duplicated vertices to render the disjoint strips as one
- *    long strip.  We'd have to be careful to avoid using too much memory
- *    for this.
- *
- * Finally, some apps might perform better if they don't use primitive restart
- * at all rather than this fallback path.  Set MESA_EXTENSION_OVERRIDE to
- * "-GL_NV_primitive_restart" to test that.
- */
-
-
-struct sub_primitive
-{
-   GLuint start;
-   GLuint count;
-   GLuint min_index;
-   GLuint max_index;
-};
-
-
-/**
- * Scan the elements array to find restart indexes.  Return an array
- * of struct sub_primitive to indicate how to draw the sub-primitives
- * are delineated by the restart index.
- */
-static struct sub_primitive *
-find_sub_primitives(const void *elements, unsigned element_size,
-                    unsigned start, unsigned end, unsigned restart_index,
-                    unsigned *num_sub_prims)
-{
-   const unsigned max_prims = end - start;
-   struct sub_primitive *sub_prims;
-   unsigned i, cur_start, cur_count;
-   GLuint scan_index;
-   unsigned scan_num;
-
-   sub_prims =
-      malloc(max_prims * sizeof(struct sub_primitive));
-
-   if (!sub_prims) {
-      *num_sub_prims = 0;
-      return NULL;
-   }
-
-   cur_start = start;
-   cur_count = 0;
-   scan_num = 0;
-
-#define IB_INDEX_READ(TYPE, INDEX) (((const GL##TYPE *) elements)[INDEX])
-
-#define SCAN_ELEMENTS(TYPE) \
-   sub_prims[scan_num].min_index = (GL##TYPE) 0xffffffff; \
-   sub_prims[scan_num].max_index = 0; \
-   for (i = start; i < end; i++) { \
-      scan_index = IB_INDEX_READ(TYPE, i); \
-      if (scan_index == restart_index) { \
-         if (cur_count > 0) { \
-            assert(scan_num < max_prims); \
-            sub_prims[scan_num].start = cur_start; \
-            sub_prims[scan_num].count = cur_count; \
-            scan_num++; \
-            sub_prims[scan_num].min_index = (GL##TYPE) 0xffffffff; \
-            sub_prims[scan_num].max_index = 0; \
-         } \
-         cur_start = i + 1; \
-         cur_count = 0; \
-      } \
-      else { \
-         UPDATE_MIN2(sub_prims[scan_num].min_index, scan_index); \
-         UPDATE_MAX2(sub_prims[scan_num].max_index, scan_index); \
-         cur_count++; \
-      } \
-   } \
-   if (cur_count > 0) { \
-      assert(scan_num < max_prims); \
-      sub_prims[scan_num].start = cur_start; \
-      sub_prims[scan_num].count = cur_count; \
-      scan_num++; \
-   }
-
-   switch (element_size) {
-   case 1:
-      SCAN_ELEMENTS(ubyte);
-      break;
-   case 2:
-      SCAN_ELEMENTS(ushort);
-      break;
-   case 4:
-      SCAN_ELEMENTS(uint);
-      break;
-   default:
-      assert(0 && "bad index_size in find_sub_primitives()");
-   }
-
-#undef SCAN_ELEMENTS
-
-   *num_sub_prims = scan_num;
-
-   return sub_prims;
-}
-
-
-/**
- * Handle primitive restart in software.
- *
- * This function breaks up calls into the driver so primitive restart
- * support is not required in the driver.
- */
-static void
-vbo_sw_primitive_restart_common_start(struct gl_context *ctx,
-                                      const struct _mesa_prim *prims,
-                                      GLuint nr_prims,
-                                      const struct _mesa_index_buffer *ib,
-                                      GLuint num_instances,
-                                      GLuint base_instance,
-                                      struct gl_buffer_object *indirect,
-                                      GLsizeiptr indirect_offset,
-                                      bool primitive_restart,
-                                      unsigned restart_index)
-{
-   GLuint prim_num;
-   struct _mesa_prim new_prim;
-   struct _mesa_index_buffer new_ib;
-   struct sub_primitive *sub_prims;
-   struct sub_primitive *sub_prim;
-   GLuint num_sub_prims;
-   GLuint sub_prim_num;
-   GLuint end_index;
-   GLuint sub_end_index;
-   struct _mesa_prim temp_prim;
-   GLboolean map_ib = ib->obj && !ib->obj->Mappings[MAP_INTERNAL].Pointer;
-   const void *ptr;
-
-   /* If there is an indirect buffer, map it and extract the draw params */
-   if (indirect) {
-      const uint32_t *indirect_params;
-      if (!ctx->Driver.MapBufferRange(ctx, 0, indirect->Size, GL_MAP_READ_BIT,
-                                      indirect, MAP_INTERNAL)) {
-
-         /* something went wrong with mapping, give up */
-         _mesa_error(ctx, GL_OUT_OF_MEMORY,
-                     "failed to map indirect buffer for sw primitive restart");
-         return;
-      }
-
-      assert(nr_prims == 1);
-      new_prim = prims[0];
-      indirect_params = (const uint32_t *)
-                        ADD_POINTERS(indirect->Mappings[MAP_INTERNAL].Pointer,
-                                     indirect_offset);
-
-      new_prim.count = indirect_params[0];
-      new_prim.start = indirect_params[2];
-      new_prim.basevertex = indirect_params[3];
-
-      num_instances = indirect_params[1];
-      base_instance = indirect_params[4];
-
-      new_ib = *ib;
-      new_ib.count = new_prim.count;
-
-      prims = &new_prim;
-      ib = &new_ib;
-
-      ctx->Driver.UnmapBuffer(ctx, indirect, MAP_INTERNAL);
-   }
-
-   /* Find the sub-primitives. These are regions in the index buffer which
-    * are split based on the primitive restart index value.
-    */
-   if (map_ib) {
-      ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT,
-                                 ib->obj, MAP_INTERNAL);
-   }
-
-   if (ib->obj)
-      ptr = ADD_POINTERS(ib->obj->Mappings[MAP_INTERNAL].Pointer, ib->ptr);
-   else
-      ptr = ib->ptr;
-
-   sub_prims = find_sub_primitives(ptr, 1 << ib->index_size_shift,
-                                   prims[0].start, prims[0].start + ib->count,
-                                   restart_index, &num_sub_prims);
-
-   if (map_ib) {
-      ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
-   }
-
-   /* Loop over the primitives, and use the located sub-primitives to draw
-    * each primitive with a break to implement each primitive restart.
-    */
-   for (prim_num = 0; prim_num < nr_prims; prim_num++) {
-      end_index = prims[prim_num].start + prims[prim_num].count;
-      memcpy(&temp_prim, &prims[prim_num], sizeof (temp_prim));
-      /* Loop over the sub-primitives drawing sub-ranges of the primitive. */
-      for (sub_prim_num = 0; sub_prim_num < num_sub_prims; sub_prim_num++) {
-         sub_prim = &sub_prims[sub_prim_num];
-         sub_end_index = sub_prim->start + sub_prim->count;
-         if (prims[prim_num].start <= sub_prim->start) {
-            temp_prim.start = MAX2(prims[prim_num].start, sub_prim->start);
-            temp_prim.count = MIN2(sub_end_index, end_index) - temp_prim.start;
-            if ((temp_prim.start == sub_prim->start) &&
-                (temp_prim.count == sub_prim->count)) {
-               ctx->Driver.Draw(ctx, &temp_prim, 1, ib, true, false, 0,
-                                sub_prim->min_index, sub_prim->max_index,
-                                num_instances, base_instance);
-            } else {
-               ctx->Driver.Draw(ctx, &temp_prim, 1, ib,
-                                false, false, 0, -1, -1,
-                                num_instances, base_instance);
-            }
-         }
-         if (sub_end_index >= end_index) {
-            break;
-         }
-      }
-   }
-
-   free(sub_prims);
-}
-
-static void
-vbo_sw_primitive_restart(struct gl_context *ctx,
-                         const struct _mesa_prim *prims,
-                         GLuint nr_prims,
-                         const struct _mesa_index_buffer *ib,
-                         GLuint num_instances,
-                         GLuint base_instance,
-                         struct gl_buffer_object *indirect,
-                         GLsizeiptr indirect_offset,
-                         bool primitive_restart,
-                         unsigned restart_index)
-{
-   unsigned i;
-   for (i = 1; i < nr_prims; i++) {
-      if (prims[i].start != prims[0].start)
-         break;
-   }
-
-   vbo_sw_primitive_restart_common_start(ctx, &prims[0], i, ib,
-                                         num_instances, base_instance,
-                                         indirect, indirect_offset,
-                                         primitive_restart,
-                                         restart_index);
-   if (i != nr_prims) {
-      vbo_sw_primitive_restart(ctx, &prims[i], nr_prims - i, ib,
-                               num_instances, base_instance,
-                               indirect, indirect_offset,
-                               primitive_restart,
-                               restart_index);
-   }
-}
-
-/**
- * Check if the hardware's cut index support can handle the primitive
- * restart index value (pre-Haswell only).
- */
-static bool
-can_cut_index_handle_restart_index(struct gl_context *ctx,
-                                   const struct _mesa_index_buffer *ib,
-                                   unsigned restart_index)
-{
-   /* The FixedIndex variant means 0xFF, 0xFFFF, or 0xFFFFFFFF based on
-    * the index buffer type, which corresponds exactly to the hardware.
-    */
-   if (ctx->Array.PrimitiveRestartFixedIndex)
-      return true;
-
-   bool cut_index_will_work;
-
-   switch (ib->index_size_shift) {
-   case 0:
-      cut_index_will_work = restart_index == 0xff;
-      break;
-   case 1:
-      cut_index_will_work = restart_index == 0xffff;
-      break;
-   case 2:
-      cut_index_will_work = restart_index == 0xffffffff;
-      break;
-   default:
-      unreachable("not reached");
-   }
-
-   return cut_index_will_work;
-}
-
-/**
- * Check if the hardware's cut index support can handle the primitive
- * restart case.
- */
-static bool
-can_cut_index_handle_prims(struct gl_context *ctx,
-                           const struct _mesa_prim *prim,
-                           GLuint nr_prims,
-                           const struct _mesa_index_buffer *ib,
-                           unsigned restart_index)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   /* Otherwise Haswell can do it all. */
-   if (devinfo->verx10 >= 75)
-      return true;
-
-   if (!can_cut_index_handle_restart_index(ctx, ib, restart_index)) {
-      /* The primitive restart index can't be handled, so take
-       * the software path
-       */
-      return false;
-   }
-
-   for (unsigned i = 0; i < nr_prims; i++) {
-      switch (prim[i].mode) {
-      case GL_POINTS:
-      case GL_LINES:
-      case GL_LINE_STRIP:
-      case GL_TRIANGLES:
-      case GL_TRIANGLE_STRIP:
-      case GL_LINES_ADJACENCY:
-      case GL_LINE_STRIP_ADJACENCY:
-      case GL_TRIANGLES_ADJACENCY:
-      case GL_TRIANGLE_STRIP_ADJACENCY:
-         /* Cut index supports these primitive types */
-         break;
-      default:
-         /* Cut index does not support these primitive types */
-      //case GL_LINE_LOOP:
-      //case GL_TRIANGLE_FAN:
-      //case GL_QUADS:
-      //case GL_QUAD_STRIP:
-      //case GL_POLYGON:
-         return false;
-      }
-   }
-
-   return true;
-}
-
-/**
- * Check if primitive restart is enabled, and if so, handle it properly.
- *
- * In some cases the support will be handled in software. When available
- * hardware will handle primitive restart.
- */
-GLboolean
-brw_handle_primitive_restart(struct gl_context *ctx,
-                             const struct _mesa_prim *prims,
-                             GLuint nr_prims,
-                             const struct _mesa_index_buffer *ib,
-                             GLuint num_instances, GLuint base_instance,
-                             bool primitive_restart,
-                             unsigned restart_index)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   /* We only need to handle cases where there is an index buffer. */
-   if (ib == NULL) {
-      return GL_FALSE;
-   }
-
-   /* If we have set the in_progress flag, then we are in the middle
-    * of handling the primitive restart draw.
-    */
-   if (brw->prim_restart.in_progress) {
-      return GL_FALSE;
-   }
-
-   /* If PrimitiveRestart is not enabled, then we aren't concerned about
-    * handling this draw.
-    */
-   if (!primitive_restart) {
-      return GL_FALSE;
-   }
-
-   /* Signal that we are in the process of handling the
-    * primitive restart draw
-    */
-   brw->prim_restart.in_progress = true;
-
-   if (can_cut_index_handle_prims(ctx, prims, nr_prims, ib, restart_index)) {
-      /* Cut index should work for primitive restart, so use it
-       */
-      brw->prim_restart.enable_cut_index = true;
-      brw->prim_restart.restart_index = restart_index;
-      brw_draw_prims(ctx, prims, nr_prims, ib, false, primitive_restart,
-                     restart_index, -1, -1,
-                     num_instances, base_instance);
-      brw->prim_restart.enable_cut_index = false;
-   } else {
-      /* Not all the primitive draw modes are supported by the cut index,
-       * so take the software path
-       */
-      struct gl_buffer_object *indirect_data = brw->draw.draw_indirect_data;
-
-      /* Clear this to make the draw direct. */
-      brw->draw.draw_indirect_data = NULL;
-
-      vbo_sw_primitive_restart(ctx, prims, nr_prims, ib, num_instances,
-                               base_instance, indirect_data,
-                               brw->draw.draw_indirect_offset,
-                               primitive_restart, restart_index);
-   }
-
-   brw->prim_restart.in_progress = false;
-
-   /* The primitive restart draw was completed, so return true. */
-   return GL_TRUE;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
deleted file mode 100644
index cbce06c..0000000
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ /dev/null
@@ -1,888 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-#include <pthread.h>
-#include "main/glspirv.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-#include "program/prog_to_nir.h"
-#include "program/program.h"
-#include "program/programopt.h"
-#include "tnl/tnl.h"
-#include "util/ralloc.h"
-#include "compiler/glsl/ir.h"
-#include "compiler/glsl/program.h"
-#include "compiler/glsl/gl_nir.h"
-#include "compiler/glsl/glsl_to_nir.h"
-
-#include "brw_program.h"
-#include "brw_context.h"
-#include "compiler/brw_nir.h"
-#include "brw_defines.h"
-#include "brw_batch.h"
-
-#include "brw_cs.h"
-#include "brw_gs.h"
-#include "brw_vs.h"
-#include "brw_wm.h"
-#include "brw_state.h"
-
-#include "main/shaderapi.h"
-#include "main/shaderobj.h"
-
-static bool
-brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
-{
-   if (is_scalar) {
-      nir_assign_var_locations(nir, nir_var_uniform, &nir->num_uniforms,
-                               type_size_scalar_bytes);
-      return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
-   } else {
-      nir_assign_var_locations(nir, nir_var_uniform, &nir->num_uniforms,
-                               type_size_vec4_bytes);
-      return nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
-   }
-}
-
-static struct gl_program *brw_new_program(struct gl_context *ctx,
-                                          gl_shader_stage stage,
-                                          GLuint id, bool is_arb_asm);
-
-nir_shader *
-brw_create_nir(struct brw_context *brw,
-               const struct gl_shader_program *shader_prog,
-               struct gl_program *prog,
-               gl_shader_stage stage,
-               bool is_scalar)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-   const nir_shader_compiler_options *options =
-      ctx->Const.ShaderCompilerOptions[stage].NirOptions;
-   nir_shader *nir;
-
-   /* First, lower the GLSL/Mesa IR or SPIR-V to NIR */
-   if (shader_prog) {
-      if (shader_prog->data->spirv) {
-         nir = _mesa_spirv_to_nir(ctx, shader_prog, stage, options);
-      } else {
-         nir = glsl_to_nir(ctx, shader_prog, stage, options);
-
-         /* Remap the locations to slots so those requiring two slots will
-          * occupy two locations. For instance, if we have in the IR code a
-          * dvec3 attr0 in location 0 and vec4 attr1 in location 1, in NIR attr0
-          * will use locations/slots 0 and 1, and attr1 will use location/slot 2
-          */
-         if (nir->info.stage == MESA_SHADER_VERTEX)
-            nir_remap_dual_slot_attributes(nir, &prog->DualSlotInputs);
-      }
-      assert (nir);
-
-      nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out,
-                                NULL);
-      nir_validate_shader(nir, "after glsl_to_nir or spirv_to_nir");
-      NIR_PASS_V(nir, nir_lower_io_to_temporaries,
-                 nir_shader_get_entrypoint(nir), true, false);
-   } else {
-      nir = prog_to_nir(prog, options);
-      NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
-   }
-   nir_validate_shader(nir, "before brw_preprocess_nir");
-
-   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
-
-   if (!ctx->SoftFP64 && ((nir->info.bit_sizes_int | nir->info.bit_sizes_float) & 64) &&
-       (options->lower_doubles_options & nir_lower_fp64_full_software)) {
-      ctx->SoftFP64 = glsl_float64_funcs_to_nir(ctx, options);
-   }
-
-   brw_preprocess_nir(brw->screen->compiler, nir, ctx->SoftFP64);
-
-   if (stage == MESA_SHADER_TESS_CTRL) {
-      /* Lower gl_PatchVerticesIn from a sys. value to a uniform on Gfx8+. */
-      static const gl_state_index16 tokens[STATE_LENGTH] =
-         { STATE_TCS_PATCH_VERTICES_IN };
-      nir_lower_patch_vertices(nir, 0, devinfo->ver >= 8 ? tokens : NULL);
-   }
-
-   if (stage == MESA_SHADER_TESS_EVAL) {
-      /* Lower gl_PatchVerticesIn to a constant if we have a TCS, or
-       * a uniform if we don't.
-       */
-      struct gl_linked_shader *tcs =
-         shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
-      uint32_t static_patch_vertices =
-         tcs ? tcs->Program->nir->info.tess.tcs_vertices_out : 0;
-      static const gl_state_index16 tokens[STATE_LENGTH] =
-         { STATE_TES_PATCH_VERTICES_IN };
-      nir_lower_patch_vertices(nir, static_patch_vertices, tokens);
-   }
-
-   if (stage == MESA_SHADER_FRAGMENT) {
-      static const struct nir_lower_wpos_ytransform_options wpos_options = {
-         .state_tokens = {STATE_FB_WPOS_Y_TRANSFORM, 0, 0},
-         .fs_coord_pixel_center_integer = 1,
-         .fs_coord_origin_upper_left = 1,
-      };
-
-      bool progress = false;
-      NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
-      if (progress) {
-         _mesa_add_state_reference(prog->Parameters,
-                                   wpos_options.state_tokens);
-      }
-   }
-
-   return nir;
-}
-
-static void
-shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
-{
-   assert(glsl_type_is_vector_or_scalar(type));
-
-   uint32_t comp_size = glsl_type_is_boolean(type)
-      ? 4 : glsl_get_bit_size(type) / 8;
-   unsigned length = glsl_get_vector_elements(type);
-   *size = comp_size * length,
-   *align = comp_size * (length == 3 ? 4 : length);
-}
-
-void
-brw_nir_lower_resources(nir_shader *nir, struct gl_shader_program *shader_prog,
-                        struct gl_program *prog,
-                        const struct intel_device_info *devinfo)
-{
-   NIR_PASS_V(nir, brw_nir_lower_uniforms, nir->options->lower_to_scalar);
-   NIR_PASS_V(prog->nir, gl_nir_lower_samplers, shader_prog);
-   BITSET_COPY(prog->info.textures_used, prog->nir->info.textures_used);
-   BITSET_COPY(prog->info.textures_used_by_txf, prog->nir->info.textures_used_by_txf);
-
-   NIR_PASS_V(prog->nir, brw_nir_lower_storage_image, devinfo);
-
-   if (prog->nir->info.stage == MESA_SHADER_COMPUTE &&
-       shader_prog->data->spirv) {
-      NIR_PASS_V(prog->nir, nir_lower_vars_to_explicit_types,
-                 nir_var_mem_shared, shared_type_info);
-      NIR_PASS_V(prog->nir, nir_lower_explicit_io,
-                 nir_var_mem_shared, nir_address_format_32bit_offset);
-   }
-
-   NIR_PASS_V(prog->nir, gl_nir_lower_buffers, shader_prog);
-   /* Do a round of constant folding to clean up address calculations */
-   NIR_PASS_V(prog->nir, nir_opt_constant_folding);
-}
-
-void
-brw_shader_gather_info(nir_shader *nir, struct gl_program *prog)
-{
-   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
-
-   /* Copy the info we just generated back into the gl_program */
-   const char *prog_name = prog->info.name;
-   const char *prog_label = prog->info.label;
-   prog->info = nir->info;
-   prog->info.name = prog_name;
-   prog->info.label = prog_label;
-}
-
-static unsigned
-get_new_program_id(struct brw_screen *screen)
-{
-   return p_atomic_inc_return(&screen->program_id);
-}
-
-static struct gl_program *
-brw_new_program(struct gl_context *ctx,
-                gl_shader_stage stage,
-                GLuint id, bool is_arb_asm)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_program *prog = rzalloc(NULL, struct brw_program);
-
-   if (prog) {
-      prog->id = get_new_program_id(brw->screen);
-
-      return _mesa_init_gl_program(&prog->program, stage, id, is_arb_asm);
-   }
-
-   return NULL;
-}
-
-static void
-brw_delete_program(struct gl_context *ctx, struct gl_program *prog)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   /* Beware!  prog's refcount has reached zero, and it's about to be freed.
-    *
-    * In brw_upload_pipeline_state(), we compare brw->programs[i] to
-    * ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
-    * pointer has changed.
-    *
-    * We cannot leave brw->programs[i] as a dangling pointer to the dead
-    * program.  malloc() may allocate the same memory for a new gl_program,
-    * causing us to see matching pointers...but totally different programs.
-    *
-    * We cannot set brw->programs[i] to NULL, either.  If we've deleted the
-    * active program, Mesa may set ctx->FooProgram._Current to NULL.  That
-    * would cause us to see matching pointers (NULL == NULL), and fail to
-    * detect that a program has changed since our last draw.
-    *
-    * So, set it to a bogus gl_program pointer that will never match,
-    * causing us to properly reevaluate the state on our next draw.
-    *
-    * Getting this wrong causes heisenbugs which are very hard to catch,
-    * as you need a very specific allocation pattern to hit the problem.
-    */
-   static const struct gl_program deleted_program;
-
-   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
-      if (brw->programs[i] == prog)
-         brw->programs[i] = (struct gl_program *) &deleted_program;
-   }
-
-   _mesa_delete_program( ctx, prog );
-}
-
-
-static GLboolean
-brw_program_string_notify(struct gl_context *ctx,
-                          GLenum target,
-                          struct gl_program *prog)
-{
-   assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant);
-
-   struct brw_context *brw = brw_context(ctx);
-   const struct brw_compiler *compiler = brw->screen->compiler;
-
-   switch (target) {
-   case GL_FRAGMENT_PROGRAM_ARB: {
-      struct brw_program *newFP = brw_program(prog);
-      const struct brw_program *curFP =
-         brw_program_const(brw->programs[MESA_SHADER_FRAGMENT]);
-
-      if (newFP == curFP)
-         brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
-      _mesa_program_fragment_position_to_sysval(&newFP->program);
-      newFP->id = get_new_program_id(brw->screen);
-
-      prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
-
-      brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
-
-      brw_shader_gather_info(prog->nir, prog);
-
-      brw_fs_precompile(ctx, prog);
-      break;
-   }
-   case GL_VERTEX_PROGRAM_ARB: {
-      struct brw_program *newVP = brw_program(prog);
-      const struct brw_program *curVP =
-         brw_program_const(brw->programs[MESA_SHADER_VERTEX]);
-
-      if (newVP == curVP)
-         brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
-      if (newVP->program.arb.IsPositionInvariant) {
-         _mesa_insert_mvp_code(ctx, &newVP->program);
-      }
-      newVP->id = get_new_program_id(brw->screen);
-
-      /* Also tell tnl about it:
-       */
-      _tnl_program_string(ctx, target, prog);
-
-      prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
-                                 compiler->scalar_stage[MESA_SHADER_VERTEX]);
-
-      brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
-
-      brw_shader_gather_info(prog->nir, prog);
-
-      brw_vs_precompile(ctx, prog);
-      break;
-   }
-   default:
-      /*
-       * driver->ProgramStringNotify is only called for ARB programs, fixed
-       * function vertex programs, and ir_to_mesa (which isn't used by the
-       * i965 back-end).  Therefore, even after geometry shaders are added,
-       * this function should only ever be called with a target of
-       * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
-       */
-      unreachable("Unexpected target in brwProgramStringNotify");
-   }
-
-   return true;
-}
-
-static void
-brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   unsigned bits = PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_CS_STALL;
-   assert(devinfo->ver >= 7 && devinfo->ver <= 11);
-
-   if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
-                   GL_ELEMENT_ARRAY_BARRIER_BIT |
-                   GL_COMMAND_BARRIER_BIT))
-      bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-
-   if (barriers & GL_UNIFORM_BARRIER_BIT)
-      bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
-               PIPE_CONTROL_CONST_CACHE_INVALIDATE);
-
-   if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
-      bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-
-   if (barriers & (GL_TEXTURE_UPDATE_BARRIER_BIT |
-                   GL_PIXEL_BUFFER_BARRIER_BIT))
-      bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
-               PIPE_CONTROL_RENDER_TARGET_FLUSH);
-
-   if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
-      bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
-               PIPE_CONTROL_RENDER_TARGET_FLUSH);
-
-   /* Typed surface messages are handled by the render cache on IVB, so we
-    * need to flush it too.
-    */
-   if (devinfo->verx10 == 70)
-      bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
-
-   brw_emit_pipe_control_flush(brw, bits);
-}
-
-static void
-brw_framebuffer_fetch_barrier(struct gl_context *ctx)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (!ctx->Extensions.EXT_shader_framebuffer_fetch) {
-      if (devinfo->ver >= 6) {
-         brw_emit_pipe_control_flush(brw,
-                                     PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                                     PIPE_CONTROL_CS_STALL);
-         brw_emit_pipe_control_flush(brw,
-                                     PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
-      } else {
-         brw_emit_pipe_control_flush(brw,
-                                     PIPE_CONTROL_RENDER_TARGET_FLUSH);
-      }
-   }
-}
-
-void
-brw_get_scratch_bo(struct brw_context *brw,
-                   struct brw_bo **scratch_bo, int size)
-{
-   struct brw_bo *old_bo = *scratch_bo;
-
-   if (old_bo && old_bo->size < size) {
-      brw_bo_unreference(old_bo);
-      old_bo = NULL;
-   }
-
-   if (!old_bo) {
-      *scratch_bo =
-         brw_bo_alloc(brw->bufmgr, "scratch bo", size, BRW_MEMZONE_SCRATCH);
-   }
-}
-
-/**
- * Reserve enough scratch space for the given stage to hold \p per_thread_size
- * bytes times the given \p thread_count.
- */
-void
-brw_alloc_stage_scratch(struct brw_context *brw,
-                        struct brw_stage_state *stage_state,
-                        unsigned per_thread_size)
-{
-   if (stage_state->per_thread_scratch >= per_thread_size)
-      return;
-
-   stage_state->per_thread_scratch = per_thread_size;
-
-   if (stage_state->scratch_bo)
-      brw_bo_unreference(stage_state->scratch_bo);
-
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   assert(stage_state->stage < ARRAY_SIZE(devinfo->max_scratch_ids));
-   unsigned max_ids = devinfo->max_scratch_ids[stage_state->stage];
-   stage_state->scratch_bo =
-      brw_bo_alloc(brw->bufmgr, "shader scratch space",
-                   per_thread_size * max_ids, BRW_MEMZONE_SCRATCH);
-}
-
-void
-brw_init_frag_prog_functions(struct dd_function_table *functions)
-{
-   assert(functions->ProgramStringNotify == _tnl_program_string);
-
-   functions->NewProgram = brw_new_program;
-   functions->DeleteProgram = brw_delete_program;
-   functions->ProgramStringNotify = brw_program_string_notify;
-
-   functions->LinkShader = brw_link_shader;
-
-   functions->MemoryBarrier = brw_memory_barrier;
-   functions->FramebufferFetchBarrier = brw_framebuffer_fetch_barrier;
-}
-
-struct shader_times {
-   uint64_t time;
-   uint64_t written;
-   uint64_t reset;
-};
-
-void
-brw_init_shader_time(struct brw_context *brw)
-{
-   const int max_entries = 2048;
-   brw->shader_time.bo =
-      brw_bo_alloc(brw->bufmgr, "shader time",
-                   max_entries * BRW_SHADER_TIME_STRIDE * 3,
-                   BRW_MEMZONE_OTHER);
-   brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
-   brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
-   brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
-                                          max_entries);
-   brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
-                                               max_entries);
-   brw->shader_time.max_entries = max_entries;
-}
-
-static int
-compare_time(const void *a, const void *b)
-{
-   uint64_t * const *a_val = a;
-   uint64_t * const *b_val = b;
-
-   /* We don't just subtract because we're turning the value to an int. */
-   if (**a_val < **b_val)
-      return -1;
-   else if (**a_val == **b_val)
-      return 0;
-   else
-      return 1;
-}
-
-static void
-print_shader_time_line(const char *stage, const char *name,
-                       int shader_num, uint64_t time, uint64_t total)
-{
-   fprintf(stderr, "%-6s%-18s", stage, name);
-
-   if (shader_num != 0)
-      fprintf(stderr, "%4d: ", shader_num);
-   else
-      fprintf(stderr, "    : ");
-
-   fprintf(stderr, "%16lld (%7.2f Gcycles)      %4.1f%%\n",
-           (long long)time,
-           (double)time / 1000000000.0,
-           (double)time / total * 100.0);
-}
-
-static void
-brw_report_shader_time(struct brw_context *brw)
-{
-   if (!brw->shader_time.bo || !brw->shader_time.num_entries)
-      return;
-
-   uint64_t scaled[brw->shader_time.num_entries];
-   uint64_t *sorted[brw->shader_time.num_entries];
-   uint64_t total_by_type[ST_CS + 1];
-   memset(total_by_type, 0, sizeof(total_by_type));
-   double total = 0;
-   for (int i = 0; i < brw->shader_time.num_entries; i++) {
-      uint64_t written = 0, reset = 0;
-      enum shader_time_shader_type type = brw->shader_time.types[i];
-
-      sorted[i] = &scaled[i];
-
-      switch (type) {
-      case ST_VS:
-      case ST_TCS:
-      case ST_TES:
-      case ST_GS:
-      case ST_FS8:
-      case ST_FS16:
-      case ST_FS32:
-      case ST_CS:
-         written = brw->shader_time.cumulative[i].written;
-         reset = brw->shader_time.cumulative[i].reset;
-         break;
-
-      default:
-         /* I sometimes want to print things that aren't the 3 shader times.
-          * Just print the sum in that case.
-          */
-         written = 1;
-         reset = 0;
-         break;
-      }
-
-      uint64_t time = brw->shader_time.cumulative[i].time;
-      if (written) {
-         scaled[i] = time / written * (written + reset);
-      } else {
-         scaled[i] = time;
-      }
-
-      switch (type) {
-      case ST_VS:
-      case ST_TCS:
-      case ST_TES:
-      case ST_GS:
-      case ST_FS8:
-      case ST_FS16:
-      case ST_FS32:
-      case ST_CS:
-         total_by_type[type] += scaled[i];
-         break;
-      default:
-         break;
-      }
-
-      total += scaled[i];
-   }
-
-   if (total == 0) {
-      fprintf(stderr, "No shader time collected yet\n");
-      return;
-   }
-
-   qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
-
-   fprintf(stderr, "\n");
-   fprintf(stderr, "type          ID                  cycles spent                   %% of total\n");
-   for (int s = 0; s < brw->shader_time.num_entries; s++) {
-      const char *stage;
-      /* Work back from the sorted pointers times to a time to print. */
-      int i = sorted[s] - scaled;
-
-      if (scaled[i] == 0)
-         continue;
-
-      int shader_num = brw->shader_time.ids[i];
-      const char *shader_name = brw->shader_time.names[i];
-
-      switch (brw->shader_time.types[i]) {
-      case ST_VS:
-         stage = "vs";
-         break;
-      case ST_TCS:
-         stage = "tcs";
-         break;
-      case ST_TES:
-         stage = "tes";
-         break;
-      case ST_GS:
-         stage = "gs";
-         break;
-      case ST_FS8:
-         stage = "fs8";
-         break;
-      case ST_FS16:
-         stage = "fs16";
-         break;
-      case ST_FS32:
-         stage = "fs32";
-         break;
-      case ST_CS:
-         stage = "cs";
-         break;
-      default:
-         stage = "other";
-         break;
-      }
-
-      print_shader_time_line(stage, shader_name, shader_num,
-                             scaled[i], total);
-   }
-
-   fprintf(stderr, "\n");
-   print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
-   print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
-   print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
-   print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
-   print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
-   print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
-   print_shader_time_line("total", "fs32", 0, total_by_type[ST_FS32], total);
-   print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
-}
-
-static void
-brw_collect_shader_time(struct brw_context *brw)
-{
-   if (!brw->shader_time.bo)
-      return;
-
-   /* This probably stalls on the last rendering.  We could fix that by
-    * delaying reading the reports, but it doesn't look like it's a big
-    * overhead compared to the cost of tracking the time in the first place.
-    */
-   void *bo_map = brw_bo_map(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE);
-
-   for (int i = 0; i < brw->shader_time.num_entries; i++) {
-      uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
-
-      brw->shader_time.cumulative[i].time += times[BRW_SHADER_TIME_STRIDE * 0 / 4];
-      brw->shader_time.cumulative[i].written += times[BRW_SHADER_TIME_STRIDE * 1 / 4];
-      brw->shader_time.cumulative[i].reset += times[BRW_SHADER_TIME_STRIDE * 2 / 4];
-   }
-
-   /* Zero the BO out to clear it out for our next collection.
-    */
-   memset(bo_map, 0, brw->shader_time.bo->size);
-   brw_bo_unmap(brw->shader_time.bo);
-}
-
-void
-brw_collect_and_report_shader_time(struct brw_context *brw)
-{
-   brw_collect_shader_time(brw);
-
-   if (brw->shader_time.report_time == 0 ||
-       get_time() - brw->shader_time.report_time >= 1.0) {
-      brw_report_shader_time(brw);
-      brw->shader_time.report_time = get_time();
-   }
-}
-
-/**
- * Chooses an index in the shader_time buffer and sets up tracking information
- * for our printouts.
- *
- * Note that this holds on to references to the underlying programs, which may
- * change their lifetimes compared to normal operation.
- */
-int
-brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog,
-                          enum shader_time_shader_type type, bool is_glsl_sh)
-{
-   int shader_time_index = brw->shader_time.num_entries++;
-   assert(shader_time_index < brw->shader_time.max_entries);
-   brw->shader_time.types[shader_time_index] = type;
-
-   const char *name;
-   if (prog->Id == 0) {
-      name = "ff";
-   } else if (is_glsl_sh) {
-      name = prog->info.label ?
-         ralloc_strdup(brw->shader_time.names, prog->info.label) : "glsl";
-   } else {
-      name = "prog";
-   }
-
-   brw->shader_time.names[shader_time_index] = name;
-   brw->shader_time.ids[shader_time_index] = prog->Id;
-
-   return shader_time_index;
-}
-
-void
-brw_destroy_shader_time(struct brw_context *brw)
-{
-   brw_bo_unreference(brw->shader_time.bo);
-   brw->shader_time.bo = NULL;
-}
-
-void
-brw_stage_prog_data_free(const void *p)
-{
-   struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
-
-   ralloc_free(prog_data->param);
-   ralloc_free(prog_data->pull_param);
-}
-
-void
-brw_dump_arb_asm(const char *stage, struct gl_program *prog)
-{
-   fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
-           stage, prog->Id, stage);
-   _mesa_print_program(prog);
-}
-
-void
-brw_setup_tex_for_precompile(const struct intel_device_info *devinfo,
-                             struct brw_sampler_prog_key_data *tex,
-                             const struct gl_program *prog)
-{
-   const bool has_shader_channel_select = devinfo->verx10 >= 75;
-   unsigned sampler_count = util_last_bit(prog->SamplersUsed);
-   for (unsigned i = 0; i < sampler_count; i++) {
-      if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
-         /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
-         tex->swizzles[i] =
-            MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
-      } else {
-         /* Color sampler: assume no swizzling. */
-         tex->swizzles[i] = SWIZZLE_XYZW;
-      }
-   }
-}
-
-/**
- * Sets up the starting offsets for the groups of binding table entries
- * common to all pipeline stages.
- *
- * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
- * unused but also make sure that addition of small offsets to them will
- * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
- */
-uint32_t
-brw_assign_common_binding_table_offsets(const struct intel_device_info *devinfo,
-                                        const struct gl_program *prog,
-                                        struct brw_stage_prog_data *stage_prog_data,
-                                        uint32_t next_binding_table_offset)
-{
-   int num_textures = util_last_bit(prog->SamplersUsed);
-
-   stage_prog_data->binding_table.texture_start = next_binding_table_offset;
-   next_binding_table_offset += num_textures;
-
-   if (prog->info.num_ubos) {
-      assert(prog->info.num_ubos <= BRW_MAX_UBO);
-      stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
-      next_binding_table_offset += prog->info.num_ubos;
-   } else {
-      stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
-   }
-
-   if (prog->info.num_ssbos || prog->info.num_abos) {
-      assert(prog->info.num_abos <= BRW_MAX_ABO);
-      assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
-      stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
-      next_binding_table_offset += prog->info.num_abos + prog->info.num_ssbos;
-   } else {
-      stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
-   }
-
-   if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
-      stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
-      next_binding_table_offset++;
-   } else {
-      stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
-   }
-
-   if (prog->info.uses_texture_gather) {
-      if (devinfo->ver >= 8) {
-         stage_prog_data->binding_table.gather_texture_start =
-            stage_prog_data->binding_table.texture_start;
-      } else {
-         stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
-         next_binding_table_offset += num_textures;
-      }
-   } else {
-      stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
-   }
-
-   if (prog->info.num_images) {
-      stage_prog_data->binding_table.image_start = next_binding_table_offset;
-      next_binding_table_offset += prog->info.num_images;
-   } else {
-      stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
-   }
-
-   /* This may or may not be used depending on how the compile goes. */
-   stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
-   next_binding_table_offset++;
-
-   /* Plane 0 is just the regular texture section */
-   stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start;
-
-   stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset;
-   next_binding_table_offset += num_textures;
-
-   stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
-   next_binding_table_offset += num_textures;
-
-   /* Set the binding table size.  Some callers may append new entries
-    * and increase this accordingly.
-    */
-   stage_prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
-
-   assert(next_binding_table_offset <= BRW_MAX_SURFACES);
-   return next_binding_table_offset;
-}
-
-void
-brw_populate_default_key(const struct brw_compiler *compiler,
-                         union brw_any_prog_key *prog_key,
-                         struct gl_shader_program *sh_prog,
-                         struct gl_program *prog)
-{
-   switch (prog->info.stage) {
-   case MESA_SHADER_VERTEX:
-      brw_vs_populate_default_key(compiler, &prog_key->vs, prog);
-      break;
-   case MESA_SHADER_TESS_CTRL:
-      brw_tcs_populate_default_key(compiler, &prog_key->tcs, sh_prog, prog);
-      break;
-   case MESA_SHADER_TESS_EVAL:
-      brw_tes_populate_default_key(compiler, &prog_key->tes, sh_prog, prog);
-      break;
-   case MESA_SHADER_GEOMETRY:
-      brw_gs_populate_default_key(compiler, &prog_key->gs, prog);
-      break;
-   case MESA_SHADER_FRAGMENT:
-      brw_wm_populate_default_key(compiler, &prog_key->wm, prog);
-      break;
-   case MESA_SHADER_COMPUTE:
-      brw_cs_populate_default_key(compiler, &prog_key->cs, prog);
-      break;
-   default:
-      unreachable("Unsupported stage!");
-   }
-}
-
-void
-brw_debug_recompile(struct brw_context *brw,
-                    gl_shader_stage stage,
-                    unsigned api_id,
-                    struct brw_base_prog_key *key)
-{
-   const struct brw_compiler *compiler = brw->screen->compiler;
-   enum brw_cache_id cache_id = brw_stage_cache_id(stage);
-
-   brw_shader_perf_log(compiler, brw, "Recompiling %s shader for program %d\n",
-                       _mesa_shader_stage_to_string(stage), api_id);
-
-   const void *old_key =
-      brw_find_previous_compile(&brw->cache, cache_id, key->program_string_id);
-
-   brw_debug_key_recompile(compiler, brw, stage, old_key, key);
-}
diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h
deleted file mode 100644
index 965ec1a..0000000
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright Â© 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BRW_PROGRAM_H
-#define BRW_PROGRAM_H
-
-#include "compiler/brw_compiler.h"
-#include "nir.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct brw_context;
-struct blob;
-struct blob_reader;
-
-enum brw_param_domain {
-   BRW_PARAM_DOMAIN_BUILTIN = 0,
-   BRW_PARAM_DOMAIN_PARAMETER,
-   BRW_PARAM_DOMAIN_UNIFORM,
-   BRW_PARAM_DOMAIN_IMAGE,
-};
-
-#define BRW_PARAM(domain, val)   (BRW_PARAM_DOMAIN_##domain << 24 | (val))
-#define BRW_PARAM_DOMAIN(param)  ((uint32_t)(param) >> 24)
-#define BRW_PARAM_VALUE(param)   ((uint32_t)(param) & 0x00ffffff)
-
-#define BRW_PARAM_PARAMETER(idx, comp) \
-   BRW_PARAM(PARAMETER, ((idx) << 2) | (comp))
-#define BRW_PARAM_PARAMETER_IDX(param)    (BRW_PARAM_VALUE(param) >> 2)
-#define BRW_PARAM_PARAMETER_COMP(param)   (BRW_PARAM_VALUE(param) & 0x3)
-
-#define BRW_PARAM_UNIFORM(idx)            BRW_PARAM(UNIFORM, (idx))
-#define BRW_PARAM_UNIFORM_IDX(param)      BRW_PARAM_VALUE(param)
-
-#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset))
-#define BRW_PARAM_IMAGE_IDX(value)        (BRW_PARAM_VALUE(value) >> 8)
-#define BRW_PARAM_IMAGE_OFFSET(value)     (BRW_PARAM_VALUE(value) & 0xf)
-
-struct nir_shader *brw_create_nir(struct brw_context *brw,
-                                  const struct gl_shader_program *shader_prog,
-                                  struct gl_program *prog,
-                                  gl_shader_stage stage,
-                                  bool is_scalar);
-
-void brw_nir_lower_resources(nir_shader *nir,
-                             struct gl_shader_program *shader_prog,
-                             struct gl_program *prog,
-                             const struct intel_device_info *devinfo);
-
-void brw_shader_gather_info(nir_shader *nir, struct gl_program *prog);
-
-void brw_setup_tex_for_precompile(const struct intel_device_info *devinfo,
-                                  struct brw_sampler_prog_key_data *tex,
-                                  const struct gl_program *prog);
-
-void brw_populate_base_prog_key(struct gl_context *ctx,
-                                const struct brw_program *prog,
-                                struct brw_base_prog_key *key);
-void brw_populate_default_base_prog_key(const struct intel_device_info *devinfo,
-                                        const struct brw_program *prog,
-                                        struct brw_base_prog_key *key);
-void brw_debug_recompile(struct brw_context *brw, gl_shader_stage stage,
-                         unsigned api_id, struct brw_base_prog_key *key);
-
-uint32_t
-brw_assign_common_binding_table_offsets(const struct intel_device_info *devinfo,
-                                        const struct gl_program *prog,
-                                        struct brw_stage_prog_data *stage_prog_data,
-                                        uint32_t next_binding_table_offset);
-
-void
-brw_populate_default_key(const struct brw_compiler *compiler,
-                         union brw_any_prog_key *prog_key,
-                         struct gl_shader_program *sh_prog,
-                         struct gl_program *prog);
-
-void
-brw_stage_prog_data_free(const void *prog_data);
-
-void
-brw_dump_arb_asm(const char *stage, struct gl_program *prog);
-
-bool brw_vs_precompile(struct gl_context *ctx, struct gl_program *prog);
-bool brw_tcs_precompile(struct gl_context *ctx,
-                        struct gl_shader_program *shader_prog,
-                        struct gl_program *prog);
-bool brw_tes_precompile(struct gl_context *ctx,
-                        struct gl_shader_program *shader_prog,
-                        struct gl_program *prog);
-bool brw_gs_precompile(struct gl_context *ctx, struct gl_program *prog);
-bool brw_fs_precompile(struct gl_context *ctx, struct gl_program *prog);
-bool brw_cs_precompile(struct gl_context *ctx, struct gl_program *prog);
-
-GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
-
-void brw_upload_tcs_prog(struct brw_context *brw);
-void brw_tcs_populate_key(struct brw_context *brw,
-                          struct brw_tcs_prog_key *key);
-void brw_tcs_populate_default_key(const struct brw_compiler *compiler,
-                                  struct brw_tcs_prog_key *key,
-                                  struct gl_shader_program *sh_prog,
-                                  struct gl_program *prog);
-void brw_upload_tes_prog(struct brw_context *brw);
-void brw_tes_populate_key(struct brw_context *brw,
-                          struct brw_tes_prog_key *key);
-void brw_tes_populate_default_key(const struct brw_compiler *compiler,
-                                  struct brw_tes_prog_key *key,
-                                  struct gl_shader_program *sh_prog,
-                                  struct gl_program *prog);
-
-void brw_write_blob_program_data(struct blob *binary, gl_shader_stage stage,
-                                 const void *program,
-                                 struct brw_stage_prog_data *prog_data);
-bool brw_read_blob_program_data(struct blob_reader *binary,
-                                struct gl_program *prog, gl_shader_stage stage,
-                                const uint8_t **program,
-                                struct brw_stage_prog_data *prog_data);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_program_binary.c b/src/mesa/drivers/dri/i965/brw_program_binary.c
deleted file mode 100644
index 919ddd3..0000000
--- a/src/mesa/drivers/dri/i965/brw_program_binary.c
+++ /dev/null
@@ -1,353 +0,0 @@
-/*
- * Copyright (c) 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include <stdint.h>
-
-#include "compiler/nir/nir_serialize.h"
-#include "util/build_id.h"
-#include "util/mesa-sha1.h"
-
-#include "brw_context.h"
-#include "brw_program.h"
-#include "brw_state.h"
-
-static uint8_t driver_sha1[20];
-
-void
-brw_program_binary_init(unsigned device_id)
-{
-   const struct build_id_note *note =
-      build_id_find_nhdr_for_addr(brw_program_binary_init);
-   assert(note);
-
-   /**
-    * With Mesa's megadrivers, taking the sha1 of i965_dri.so may not be
-    * unique. Therefore, we make a sha1 of the "i965" string and the sha1
-    * build id from i965_dri.so.
-    */
-   struct mesa_sha1 ctx;
-   _mesa_sha1_init(&ctx);
-   char renderer[10];
-   assert(device_id < 0x10000);
-   int len = snprintf(renderer, sizeof(renderer), "i965_%04x", device_id);
-   assert(len == sizeof(renderer) - 1);
-   _mesa_sha1_update(&ctx, renderer, len);
-   _mesa_sha1_update(&ctx, build_id_data(note), build_id_length(note));
-   _mesa_sha1_final(&ctx, driver_sha1);
-}
-
-void
-brw_get_program_binary_driver_sha1(struct gl_context *ctx, uint8_t *sha1)
-{
-   memcpy(sha1, driver_sha1, sizeof(uint8_t) * 20);
-}
-
-enum driver_cache_blob_part {
-   END_PART,
-   INTEL_PART,
-   NIR_PART,
-};
-
-static bool
-blob_parts_valid(void *blob, uint32_t size)
-{
-   struct blob_reader reader;
-   blob_reader_init(&reader, blob, size);
-
-   do {
-      uint32_t part_type = blob_read_uint32(&reader);
-      if (reader.overrun)
-         return false;
-      if (part_type == END_PART)
-         return reader.current == reader.end;
-      switch ((enum driver_cache_blob_part)part_type) {
-      case INTEL_PART:
-      case NIR_PART:
-         /* Read the uint32_t part-size and skip over it */
-         blob_skip_bytes(&reader, blob_read_uint32(&reader));
-         if (reader.overrun)
-            return false;
-         break;
-      default:
-         return false;
-      }
-   } while (true);
-}
-
-static bool
-blob_has_part(void *blob, uint32_t size, enum driver_cache_blob_part part)
-{
-   struct blob_reader reader;
-   blob_reader_init(&reader, blob, size);
-
-   assert(blob_parts_valid(blob, size));
-   do {
-      uint32_t part_type = blob_read_uint32(&reader);
-      if (part_type == END_PART)
-         return false;
-      if (part_type == part)
-         return true;
-      blob_skip_bytes(&reader, blob_read_uint32(&reader));
-   } while (true);
-}
-
-static bool
-driver_blob_is_ready(void *blob, uint32_t size, bool with_intel_program)
-{
-   if (!blob) {
-      return false;
-   } else if (!blob_parts_valid(blob, size)) {
-      unreachable("Driver blob format is bad!");
-      return false;
-   } else if (blob_has_part(blob, size, INTEL_PART) == with_intel_program) {
-      return true;
-   } else {
-      return false;
-   }
-}
-
-static void
-serialize_nir_part(struct blob *writer, struct gl_program *prog)
-{
-   blob_write_uint32(writer, NIR_PART);
-   intptr_t size_offset = blob_reserve_uint32(writer);
-   size_t nir_start = writer->size;
-   nir_serialize(writer, prog->nir, false);
-   blob_overwrite_uint32(writer, size_offset, writer->size - nir_start);
-}
-
-void
-brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog)
-{
-   if (driver_blob_is_ready(prog->driver_cache_blob,
-                            prog->driver_cache_blob_size, false))
-      return;
-
-   if (prog->driver_cache_blob)
-      ralloc_free(prog->driver_cache_blob);
-
-   struct blob writer;
-   blob_init(&writer);
-   serialize_nir_part(&writer, prog);
-   blob_write_uint32(&writer, END_PART);
-   prog->driver_cache_blob = ralloc_size(NULL, writer.size);
-   memcpy(prog->driver_cache_blob, writer.data, writer.size);
-   prog->driver_cache_blob_size = writer.size;
-   blob_finish(&writer);
-}
-
-static bool
-deserialize_intel_program(struct blob_reader *reader, struct gl_context *ctx,
-                        struct gl_program *prog, gl_shader_stage stage)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   union brw_any_prog_key prog_key;
-   blob_copy_bytes(reader, &prog_key, brw_prog_key_size(stage));
-   prog_key.base.program_string_id = brw_program(prog)->id;
-
-   enum brw_cache_id cache_id = brw_stage_cache_id(stage);
-
-   const uint8_t *program;
-   struct brw_stage_prog_data *prog_data =
-      ralloc_size(NULL, sizeof(union brw_any_prog_data));
-
-   if (!brw_read_blob_program_data(reader, prog, stage, &program, prog_data)) {
-      ralloc_free(prog_data);
-      return false;
-   }
-
-   uint32_t offset;
-   void *out_prog_data;
-   brw_upload_cache(&brw->cache, cache_id, &prog_key, brw_prog_key_size(stage),
-                    program, prog_data->program_size, prog_data,
-                    brw_prog_data_size(stage), &offset, &out_prog_data);
-
-   ralloc_free(prog_data);
-
-   return true;
-}
-
-void
-brw_program_deserialize_driver_blob(struct gl_context *ctx,
-                                    struct gl_program *prog,
-                                    gl_shader_stage stage)
-{
-   if (!prog->driver_cache_blob)
-      return;
-
-   struct blob_reader reader;
-   blob_reader_init(&reader, prog->driver_cache_blob,
-                    prog->driver_cache_blob_size);
-
-   do {
-      uint32_t part_type = blob_read_uint32(&reader);
-      if ((enum driver_cache_blob_part)part_type == END_PART)
-         break;
-      switch ((enum driver_cache_blob_part)part_type) {
-      case INTEL_PART: {
-         ASSERTED uint32_t gen_size = blob_read_uint32(&reader);
-         assert(!reader.overrun &&
-                (uintptr_t)(reader.end - reader.current) > gen_size);
-         deserialize_intel_program(&reader, ctx, prog, stage);
-         break;
-      }
-      case NIR_PART: {
-         ASSERTED uint32_t nir_size = blob_read_uint32(&reader);
-         assert(!reader.overrun &&
-                (uintptr_t)(reader.end - reader.current) > nir_size);
-         const struct nir_shader_compiler_options *options =
-            ctx->Const.ShaderCompilerOptions[stage].NirOptions;
-         prog->nir = nir_deserialize(NULL, options, &reader);
-         break;
-      }
-      default:
-         unreachable("Unsupported blob part type!");
-         break;
-      }
-   } while (true);
-
-   ralloc_free(prog->driver_cache_blob);
-   prog->driver_cache_blob = NULL;
-   prog->driver_cache_blob_size = 0;
-}
-
-/* This is just a wrapper around brw_program_deserialize_nir() as i965
- * doesn't need gl_shader_program like other drivers do.
- */
-void
-brw_deserialize_program_binary(struct gl_context *ctx,
-                               struct gl_shader_program *shProg,
-                               struct gl_program *prog)
-{
-   brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage);
-}
-
-static void
-serialize_intel_part(struct blob *writer, struct gl_context *ctx,
-                   struct gl_shader_program *sh_prog,
-                   struct gl_program *prog)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   union brw_any_prog_key key;
-   brw_populate_default_key(brw->screen->compiler, &key, sh_prog, prog);
-
-   const gl_shader_stage stage = prog->info.stage;
-   uint32_t offset = 0;
-   void *prog_data = NULL;
-   if (brw_search_cache(&brw->cache, brw_stage_cache_id(stage), &key,
-                        brw_prog_key_size(stage), &offset, &prog_data,
-                        false)) {
-      const void *program_map = brw->cache.map + offset;
-      /* TODO: Improve perf for non-LLC. It would be best to save it at
-       * program generation time when the program is in normal memory
-       * accessible with cache to the CPU. Another easier change would be to
-       * use _mesa_streaming_load_memcpy to read from the program mapped
-       * memory.
-       */
-      blob_write_uint32(writer, INTEL_PART);
-      intptr_t size_offset = blob_reserve_uint32(writer);
-      size_t gen_start = writer->size;
-      blob_write_bytes(writer, &key, brw_prog_key_size(stage));
-      brw_write_blob_program_data(writer, stage, program_map, prog_data);
-      blob_overwrite_uint32(writer, size_offset, writer->size - gen_start);
-   }
-}
-
-void
-brw_serialize_program_binary(struct gl_context *ctx,
-                             struct gl_shader_program *sh_prog,
-                             struct gl_program *prog)
-{
-   if (driver_blob_is_ready(prog->driver_cache_blob,
-                            prog->driver_cache_blob_size, true))
-      return;
-
-   if (prog->driver_cache_blob) {
-      if (!prog->nir) {
-         /* If we loaded from the disk shader cache, then the nir might not
-          * have been deserialized yet.
-          */
-         brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage);
-      }
-      ralloc_free(prog->driver_cache_blob);
-   }
-
-   struct blob writer;
-   blob_init(&writer);
-   serialize_nir_part(&writer, prog);
-   serialize_intel_part(&writer, ctx, sh_prog, prog);
-   blob_write_uint32(&writer, END_PART);
-   prog->driver_cache_blob = ralloc_size(NULL, writer.size);
-   memcpy(prog->driver_cache_blob, writer.data, writer.size);
-   prog->driver_cache_blob_size = writer.size;
-   blob_finish(&writer);
-}
-
-void
-brw_write_blob_program_data(struct blob *binary, gl_shader_stage stage,
-                            const void *program,
-                            struct brw_stage_prog_data *prog_data)
-{
-   /* Write prog_data to blob. */
-   blob_write_bytes(binary, prog_data, brw_prog_data_size(stage));
-
-   /* Write program to blob. */
-   blob_write_bytes(binary, program, prog_data->program_size);
-
-   /* Write push params */
-   blob_write_bytes(binary, prog_data->param,
-                    sizeof(uint32_t) * prog_data->nr_params);
-
-   /* Write pull params */
-   blob_write_bytes(binary, prog_data->pull_param,
-                    sizeof(uint32_t) * prog_data->nr_pull_params);
-}
-
-bool
-brw_read_blob_program_data(struct blob_reader *binary, struct gl_program *prog,
-                           gl_shader_stage stage, const uint8_t **program,
-                           struct brw_stage_prog_data *prog_data)
-{
-   /* Read shader prog_data from blob. */
-   blob_copy_bytes(binary, prog_data, brw_prog_data_size(stage));
-   if (binary->overrun)
-      return false;
-
-   /* Read shader program from blob. */
-   *program = blob_read_bytes(binary, prog_data->program_size);
-
-   /* Read push params */
-   prog_data->param = rzalloc_array(NULL, uint32_t, prog_data->nr_params);
-   blob_copy_bytes(binary, prog_data->param,
-                   sizeof(uint32_t) * prog_data->nr_params);
-
-   /* Read pull params */
-   prog_data->pull_param = rzalloc_array(NULL, uint32_t,
-                                         prog_data->nr_pull_params);
-   blob_copy_bytes(binary, prog_data->pull_param,
-                   sizeof(uint32_t) * prog_data->nr_pull_params);
-
-   return !binary->overrun;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c b/src/mesa/drivers/dri/i965/brw_program_cache.c
deleted file mode 100644
index df2b92e..0000000
--- a/src/mesa/drivers/dri/i965/brw_program_cache.c
+++ /dev/null
@@ -1,523 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-/** @file brw_program_cache.c
- *
- * This file implements a simple program cache for 965.  The consumers can
- *  query the hash table of programs using a cache_id and program key, and
- * receive the corresponding program buffer object (plus associated auxiliary
- *  data) in return.  Objects in the cache may not have relocations
- * (pointers to other BOs) in them.
- *
- * The inner workings are a simple hash table based on a FNV-1a of the
- * key data.
- *
- * Replacement is not implemented.  Instead, when the cache gets too
- * big we throw out all of the cache data and let it get regenerated.
- */
-
-#include "main/streaming-load-memcpy.h"
-#include "x86/common_x86_asm.h"
-#include "brw_batch.h"
-#include "brw_state.h"
-#include "brw_wm.h"
-#include "brw_gs.h"
-#include "brw_cs.h"
-#include "brw_program.h"
-#include "compiler/brw_eu.h"
-#include "util/u_memory.h"
-#define XXH_INLINE_ALL
-#include "util/xxhash.h"
-
-#define FILE_DEBUG_FLAG DEBUG_STATE
-
-struct brw_cache_item {
-   /**
-    * Effectively part of the key, cache_id identifies what kind of state
-    * buffer is involved, and also which dirty flag should set.
-    */
-   enum brw_cache_id cache_id;
-
-   /** 32-bit hash of the key data */
-   GLuint hash;
-
-   /** for variable-sized keys */
-   GLuint key_size;
-   GLuint prog_data_size;
-   const struct brw_base_prog_key *key;
-
-   uint32_t offset;
-   uint32_t size;
-
-   struct brw_cache_item *next;
-};
-
-enum brw_cache_id
-brw_stage_cache_id(gl_shader_stage stage)
-{
-   static const enum brw_cache_id stage_ids[] = {
-      BRW_CACHE_VS_PROG,
-      BRW_CACHE_TCS_PROG,
-      BRW_CACHE_TES_PROG,
-      BRW_CACHE_GS_PROG,
-      BRW_CACHE_FS_PROG,
-      BRW_CACHE_CS_PROG,
-   };
-   assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_ids));
-   return stage_ids[stage];
-}
-
-static GLuint
-hash_key(struct brw_cache_item *item)
-{
-    uint32_t hash = 0;
-    hash = XXH32(&item->cache_id, sizeof(item->cache_id), hash);
-    hash = XXH32(item->key, item->key_size, hash);
-
-   return hash;
-}
-
-static int
-brw_cache_item_equals(const struct brw_cache_item *a,
-                      const struct brw_cache_item *b)
-{
-   return a->cache_id == b->cache_id &&
-      a->hash == b->hash &&
-      a->key_size == b->key_size &&
-      (memcmp(a->key, b->key, a->key_size) == 0);
-}
-
-static struct brw_cache_item *
-search_cache(struct brw_cache *cache, GLuint hash,
-             struct brw_cache_item *lookup)
-{
-   struct brw_cache_item *c;
-
-#if 0
-   int bucketcount = 0;
-
-   for (c = cache->items[hash % cache->size]; c; c = c->next)
-      bucketcount++;
-
-   fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size,
-           cache->size, bucketcount, cache->n_items);
-#endif
-
-   for (c = cache->items[hash % cache->size]; c; c = c->next) {
-      if (brw_cache_item_equals(lookup, c))
-         return c;
-   }
-
-   return NULL;
-}
-
-
-static void
-rehash(struct brw_cache *cache)
-{
-   struct brw_cache_item **items;
-   struct brw_cache_item *c, *next;
-   GLuint size, i;
-
-   size = cache->size * 3;
-   items = calloc(size, sizeof(*items));
-
-   for (i = 0; i < cache->size; i++)
-      for (c = cache->items[i]; c; c = next) {
-         next = c->next;
-         c->next = items[c->hash % size];
-         items[c->hash % size] = c;
-      }
-
-   free(cache->items);
-   cache->items = items;
-   cache->size = size;
-}
-
-
-/**
- * Returns the buffer object matching cache_id and key, or NULL.
- */
-bool
-brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
-                 const void *key, GLuint key_size, uint32_t *inout_offset,
-                 void *inout_prog_data, bool flag_state)
-{
-   struct brw_cache_item *item;
-   struct brw_cache_item lookup;
-   GLuint hash;
-
-   lookup.cache_id = cache_id;
-   lookup.key = key;
-   lookup.key_size = key_size;
-   hash = hash_key(&lookup);
-   lookup.hash = hash;
-
-   item = search_cache(cache, hash, &lookup);
-
-   if (item == NULL)
-      return false;
-
-   void *prog_data = ((char *) item->key) + item->key_size;
-
-   if (item->offset != *inout_offset ||
-       prog_data != *((void **) inout_prog_data)) {
-      if (likely(flag_state))
-         cache->brw->ctx.NewDriverState |= (1 << cache_id);
-      *inout_offset = item->offset;
-      *((void **) inout_prog_data) = prog_data;
-   }
-
-   return true;
-}
-
-static void
-brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
-{
-   struct brw_context *brw = cache->brw;
-   struct brw_bo *new_bo;
-
-   perf_debug("Copying to larger program cache: %u kB -> %u kB\n",
-              (unsigned) cache->bo->size / 1024, new_size / 1024);
-
-   new_bo = brw_bo_alloc(brw->bufmgr, "program cache", new_size,
-                         BRW_MEMZONE_SHADER);
-   if (can_do_exec_capture(brw->screen))
-      new_bo->kflags |= EXEC_OBJECT_CAPTURE;
-
-   void *map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE |
-                                       MAP_ASYNC | MAP_PERSISTENT);
-
-   /* Copy any existing data that needs to be saved. */
-   if (cache->next_offset != 0) {
-#ifdef USE_SSE41
-      if (!cache->bo->cache_coherent && cpu_has_sse4_1)
-         _mesa_streaming_load_memcpy(map, cache->map, cache->next_offset);
-      else
-#endif
-         memcpy(map, cache->map, cache->next_offset);
-   }
-
-   brw_bo_unmap(cache->bo);
-   brw_bo_unreference(cache->bo);
-   cache->bo = new_bo;
-   cache->map = map;
-
-   /* Since we have a new BO in place, we need to signal the units
-    * that depend on it (state base address on gfx5+, or unit state before).
-    */
-   brw->ctx.NewDriverState |= BRW_NEW_PROGRAM_CACHE;
-   brw->batch.state_base_address_emitted = false;
-}
-
-/**
- * Attempts to find an item in the cache with identical data.
- */
-static const struct brw_cache_item *
-brw_lookup_prog(const struct brw_cache *cache,
-                enum brw_cache_id cache_id,
-                const void *data, unsigned data_size)
-{
-   unsigned i;
-   const struct brw_cache_item *item;
-
-   for (i = 0; i < cache->size; i++) {
-      for (item = cache->items[i]; item; item = item->next) {
-         if (item->cache_id != cache_id || item->size != data_size ||
-             memcmp(cache->map + item->offset, data, item->size) != 0)
-            continue;
-
-         return item;
-      }
-   }
-
-   return NULL;
-}
-
-static uint32_t
-brw_alloc_item_data(struct brw_cache *cache, uint32_t size)
-{
-   uint32_t offset;
-
-   /* Allocate space in the cache BO for our new program. */
-   if (cache->next_offset + size > cache->bo->size) {
-      uint32_t new_size = cache->bo->size * 2;
-
-      while (cache->next_offset + size > new_size)
-         new_size *= 2;
-
-      brw_cache_new_bo(cache, new_size);
-   }
-
-   offset = cache->next_offset;
-
-   /* Programs are always 64-byte aligned, so set up the next one now */
-   cache->next_offset = ALIGN(offset + size, 64);
-
-   return offset;
-}
-
-const void *
-brw_find_previous_compile(struct brw_cache *cache,
-                          enum brw_cache_id cache_id,
-                          unsigned program_string_id)
-{
-   for (unsigned i = 0; i < cache->size; i++) {
-      for (struct brw_cache_item *c = cache->items[i]; c; c = c->next) {
-         if (c->cache_id == cache_id &&
-             c->key->program_string_id == program_string_id) {
-            return c->key;
-         }
-      }
-   }
-
-   return NULL;
-}
-
-void
-brw_upload_cache(struct brw_cache *cache,
-                 enum brw_cache_id cache_id,
-                 const void *key,
-                 GLuint key_size,
-                 const void *data,
-                 GLuint data_size,
-                 const void *prog_data,
-                 GLuint prog_data_size,
-                 uint32_t *out_offset,
-                 void *out_prog_data)
-{
-   struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
-   const struct brw_cache_item *matching_data =
-      brw_lookup_prog(cache, cache_id, data, data_size);
-   GLuint hash;
-   void *tmp;
-
-   item->cache_id = cache_id;
-   item->size = data_size;
-   item->key = key;
-   item->key_size = key_size;
-   item->prog_data_size = prog_data_size;
-   hash = hash_key(item);
-   item->hash = hash;
-
-   /* If we can find a matching prog in the cache already, then reuse the
-    * existing stuff without creating new copy into the underlying buffer
-    * object. This is notably useful for programs generating shaders at
-    * runtime, where multiple shaders may compile to the same thing in our
-    * backend.
-    */
-   if (matching_data) {
-      item->offset = matching_data->offset;
-   } else {
-      item->offset = brw_alloc_item_data(cache, data_size);
-
-      /* Copy data to the buffer */
-      memcpy(cache->map + item->offset, data, data_size);
-   }
-
-   /* Set up the memory containing the key and prog_data */
-   tmp = malloc(key_size + prog_data_size);
-
-   memcpy(tmp, key, key_size);
-   memcpy(tmp + key_size, prog_data, prog_data_size);
-
-   item->key = tmp;
-
-   if (cache->n_items > cache->size * 1.5f)
-      rehash(cache);
-
-   hash %= cache->size;
-   item->next = cache->items[hash];
-   cache->items[hash] = item;
-   cache->n_items++;
-
-   *out_offset = item->offset;
-   *(void **)out_prog_data = (void *)((char *)item->key + item->key_size);
-   cache->brw->ctx.NewDriverState |= 1 << cache_id;
-}
-
-void
-brw_init_caches(struct brw_context *brw)
-{
-   struct brw_cache *cache = &brw->cache;
-
-   cache->brw = brw;
-
-   cache->size = 7;
-   cache->n_items = 0;
-   cache->items =
-      calloc(cache->size, sizeof(struct brw_cache_item *));
-
-   cache->bo = brw_bo_alloc(brw->bufmgr, "program cache", 16384,
-                            BRW_MEMZONE_SHADER);
-   if (can_do_exec_capture(brw->screen))
-      cache->bo->kflags |= EXEC_OBJECT_CAPTURE;
-
-   cache->map = brw_bo_map(brw, cache->bo, MAP_READ | MAP_WRITE |
-                                           MAP_ASYNC | MAP_PERSISTENT);
-}
-
-static void
-brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
-{
-   struct brw_cache_item *c, *next;
-   GLuint i;
-
-   DBG("%s\n", __func__);
-
-   for (i = 0; i < cache->size; i++) {
-      for (c = cache->items[i]; c; c = next) {
-         next = c->next;
-         if (c->cache_id == BRW_CACHE_VS_PROG ||
-             c->cache_id == BRW_CACHE_TCS_PROG ||
-             c->cache_id == BRW_CACHE_TES_PROG ||
-             c->cache_id == BRW_CACHE_GS_PROG ||
-             c->cache_id == BRW_CACHE_FS_PROG ||
-             c->cache_id == BRW_CACHE_CS_PROG) {
-            const void *item_prog_data = ((char *)c->key) + c->key_size;
-            brw_stage_prog_data_free(item_prog_data);
-         }
-         free((void *)c->key);
-         free(c);
-      }
-      cache->items[i] = NULL;
-   }
-
-   cache->n_items = 0;
-
-   /* Start putting programs into the start of the BO again, since
-    * we'll never find the old results.
-    */
-   cache->next_offset = 0;
-
-   /* We need to make sure that the programs get regenerated, since
-    * any offsets leftover in brw_context will no longer be valid.
-    */
-   brw->NewGLState = ~0;
-   brw->ctx.NewDriverState = ~0ull;
-   brw->state.pipelines[BRW_RENDER_PIPELINE].mesa = ~0;
-   brw->state.pipelines[BRW_RENDER_PIPELINE].brw = ~0ull;
-   brw->state.pipelines[BRW_COMPUTE_PIPELINE].mesa = ~0;
-   brw->state.pipelines[BRW_COMPUTE_PIPELINE].brw = ~0ull;
-
-   /* Also, NULL out any stale program pointers. */
-   brw->vs.base.prog_data = NULL;
-   brw->tcs.base.prog_data = NULL;
-   brw->tes.base.prog_data = NULL;
-   brw->gs.base.prog_data = NULL;
-   brw->wm.base.prog_data = NULL;
-   brw->cs.base.prog_data = NULL;
-
-   brw_batch_flush(brw);
-}
-
-void
-brw_program_cache_check_size(struct brw_context *brw)
-{
-   /* un-tuned guess.  Each object is generally a page, so 2000 of them is 8 MB of
-    * state cache.
-    */
-   if (brw->cache.n_items > 2000) {
-      perf_debug("Exceeded state cache size limit.  Clearing the set "
-                 "of compiled programs, which will trigger recompiles\n");
-      brw_clear_cache(brw, &brw->cache);
-      brw_cache_new_bo(&brw->cache, brw->cache.bo->size);
-   }
-}
-
-
-static void
-brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
-{
-
-   DBG("%s\n", __func__);
-
-   /* This can be NULL if context creation failed early on */
-   if (cache->bo) {
-      brw_bo_unmap(cache->bo);
-      brw_bo_unreference(cache->bo);
-      cache->bo = NULL;
-      cache->map = NULL;
-   }
-   brw_clear_cache(brw, cache);
-   free(cache->items);
-   cache->items = NULL;
-   cache->size = 0;
-}
-
-
-void
-brw_destroy_caches(struct brw_context *brw)
-{
-   brw_destroy_cache(brw, &brw->cache);
-}
-
-static const char *
-cache_name(enum brw_cache_id cache_id)
-{
-   switch (cache_id) {
-   case BRW_CACHE_VS_PROG:
-      return "VS kernel";
-   case BRW_CACHE_TCS_PROG:
-      return "TCS kernel";
-   case BRW_CACHE_TES_PROG:
-      return "TES kernel";
-   case BRW_CACHE_FF_GS_PROG:
-      return "Fixed-function GS kernel";
-   case BRW_CACHE_GS_PROG:
-      return "GS kernel";
-   case BRW_CACHE_CLIP_PROG:
-      return "CLIP kernel";
-   case BRW_CACHE_SF_PROG:
-      return "SF kernel";
-   case BRW_CACHE_FS_PROG:
-      return "FS kernel";
-   case BRW_CACHE_CS_PROG:
-      return "CS kernel";
-   default:
-      return "unknown";
-   }
-}
-
-void
-brw_print_program_cache(struct brw_context *brw)
-{
-   const struct brw_cache *cache = &brw->cache;
-   struct brw_cache_item *item;
-
-   for (unsigned i = 0; i < cache->size; i++) {
-      for (item = cache->items[i]; item; item = item->next) {
-         fprintf(stderr, "%s:\n", cache_name(i));
-         brw_disassemble_with_labels(&brw->screen->devinfo, cache->map,
-                                     item->offset, item->size, stderr);
-      }
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c
deleted file mode 100644
index efaa8fb..0000000
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ /dev/null
@@ -1,621 +0,0 @@
-/*
- * Copyright Â© 2008 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *
- */
-
-/** @file brw_queryobj.c
- *
- * Support for query objects (GL_ARB_occlusion_query, GL_ARB_timer_query,
- * GL_EXT_transform_feedback, and friends).
- *
- * The hardware provides a PIPE_CONTROL command that can report the number of
- * fragments that passed the depth test, or the hardware timer.  They are
- * appropriately synced with the stage of the pipeline for our extensions'
- * needs.
- */
-#include "main/queryobj.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "brw_batch.h"
-
-/* As best we know currently, the Gen HW timestamps are 36bits across
- * all platforms, which we need to account for when calculating a
- * delta to measure elapsed time.
- *
- * The timestamps read via glGetTimestamp() / brw_get_timestamp() sometimes
- * only have 32bits due to a kernel bug and so in that case we make sure to
- * treat all raw timestamps as 32bits so they overflow consistently and remain
- * comparable. (Note: the timestamps being passed here are not from the kernel
- * so we don't need to be taking the upper 32bits in this buggy kernel case we
- * are just clipping to 32bits here for consistency.)
- */
-uint64_t
-brw_raw_timestamp_delta(struct brw_context *brw, uint64_t time0, uint64_t time1)
-{
-   if (brw->screen->hw_has_timestamp == 2) {
-      /* Kernel clips timestamps to 32bits in this case, so we also clip
-       * PIPE_CONTROL timestamps for consistency.
-       */
-      return (uint32_t)time1 - (uint32_t)time0;
-   } else {
-      if (time0 > time1) {
-         return (1ULL << 36) + time1 - time0;
-      } else {
-         return time1 - time0;
-      }
-   }
-}
-
-/**
- * Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer.
- */
-void
-brw_write_timestamp(struct brw_context *brw, struct brw_bo *query_bo, int idx)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (devinfo->ver == 6) {
-      /* Emit Sandybridge workaround flush: */
-      brw_emit_pipe_control_flush(brw,
-                                  PIPE_CONTROL_CS_STALL |
-                                  PIPE_CONTROL_STALL_AT_SCOREBOARD);
-   }
-
-   uint32_t flags = PIPE_CONTROL_WRITE_TIMESTAMP;
-
-   if (devinfo->ver == 9 && devinfo->gt == 4)
-      flags |= PIPE_CONTROL_CS_STALL;
-
-   brw_emit_pipe_control_write(brw, flags,
-                               query_bo, idx * sizeof(uint64_t), 0);
-}
-
-/**
- * Emit PIPE_CONTROLs to write the PS_DEPTH_COUNT register into a buffer.
- */
-void
-brw_write_depth_count(struct brw_context *brw, struct brw_bo *query_bo, int idx)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   uint32_t flags = PIPE_CONTROL_WRITE_DEPTH_COUNT | PIPE_CONTROL_DEPTH_STALL;
-
-   if (devinfo->ver == 9 && devinfo->gt == 4)
-      flags |= PIPE_CONTROL_CS_STALL;
-
-   if (devinfo->ver >= 10) {
-      /* "Driver must program PIPE_CONTROL with only Depth Stall Enable bit set
-       * prior to programming a PIPE_CONTROL with Write PS Depth Count Post sync
-       * operation."
-       */
-      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
-   }
-
-   brw_emit_pipe_control_write(brw, flags,
-                               query_bo, idx * sizeof(uint64_t), 0);
-}
-
-/**
- * Wait on the query object's BO and calculate the final result.
- */
-static void
-brw_queryobj_get_results(struct gl_context *ctx,
-                         struct brw_query_object *query)
-{
-   struct brw_context *brw = brw_context(ctx);
-   UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   int i;
-   uint64_t *results;
-
-   assert(devinfo->ver < 6);
-
-   if (query->bo == NULL)
-      return;
-
-   /* If the application has requested the query result, but this batch is
-    * still contributing to it, flush it now so the results will be present
-    * when mapped.
-    */
-   if (brw_batch_references(&brw->batch, query->bo))
-      brw_batch_flush(brw);
-
-   if (unlikely(brw->perf_debug)) {
-      if (brw_bo_busy(query->bo)) {
-         perf_debug("Stalling on the GPU waiting for a query object.\n");
-      }
-   }
-
-   results = brw_bo_map(brw, query->bo, MAP_READ);
-   switch (query->Base.Target) {
-   case GL_TIME_ELAPSED_EXT:
-      /* The query BO contains the starting and ending timestamps.
-       * Subtract the two and convert to nanoseconds.
-       */
-      query->Base.Result = brw_raw_timestamp_delta(brw, results[0], results[1]);
-      query->Base.Result = intel_device_info_timebase_scale(devinfo, query->Base.Result);
-      break;
-
-   case GL_TIMESTAMP:
-      /* The query BO contains a single timestamp value in results[0]. */
-      query->Base.Result = intel_device_info_timebase_scale(devinfo, results[0]);
-
-      /* Ensure the scaled timestamp overflows according to
-       * GL_QUERY_COUNTER_BITS
-       */
-      query->Base.Result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1;
-      break;
-
-   case GL_SAMPLES_PASSED_ARB:
-      /* Loop over pairs of values from the BO, which are the PS_DEPTH_COUNT
-       * value at the start and end of the batchbuffer.  Subtract them to
-       * get the number of fragments which passed the depth test in each
-       * individual batch, and add those differences up to get the number
-       * of fragments for the entire query.
-       *
-       * Note that query->Base.Result may already be non-zero.  We may have
-       * run out of space in the query's BO and allocated a new one.  If so,
-       * this function was already called to accumulate the results so far.
-       */
-      for (i = 0; i < query->last_index; i++) {
-         query->Base.Result += results[i * 2 + 1] - results[i * 2];
-      }
-      break;
-
-   case GL_ANY_SAMPLES_PASSED:
-   case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
-      /* If the starting and ending PS_DEPTH_COUNT from any of the batches
-       * differ, then some fragments passed the depth test.
-       */
-      for (i = 0; i < query->last_index; i++) {
-         if (results[i * 2 + 1] != results[i * 2]) {
-            query->Base.Result = GL_TRUE;
-            break;
-         }
-      }
-      break;
-
-   default:
-      unreachable("Unrecognized query target in brw_queryobj_get_results()");
-   }
-   brw_bo_unmap(query->bo);
-
-   /* Now that we've processed the data stored in the query's buffer object,
-    * we can release it.
-    */
-   brw_bo_unreference(query->bo);
-   query->bo = NULL;
-}
-
-/**
- * The NewQueryObject() driver hook.
- *
- * Allocates and initializes a new query object.
- */
-static struct gl_query_object *
-brw_new_query_object(struct gl_context *ctx, GLuint id)
-{
-   struct brw_query_object *query;
-
-   query = calloc(1, sizeof(struct brw_query_object));
-
-   query->Base.Id = id;
-   query->Base.Result = 0;
-   query->Base.Active = false;
-   query->Base.Ready = true;
-
-   return &query->Base;
-}
-
-/**
- * The DeleteQuery() driver hook.
- */
-static void
-brw_delete_query(struct gl_context *ctx, struct gl_query_object *q)
-{
-   struct brw_query_object *query = (struct brw_query_object *)q;
-
-   brw_bo_unreference(query->bo);
-   _mesa_delete_query(ctx, q);
-}
-
-/**
- * Gfx4-5 driver hook for glBeginQuery().
- *
- * Initializes driver structures and emits any GPU commands required to begin
- * recording data for the query.
- */
-static void
-brw_begin_query(struct gl_context *ctx, struct gl_query_object *q)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_query_object *query = (struct brw_query_object *)q;
-   UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   assert(devinfo->ver < 6);
-
-   switch (query->Base.Target) {
-   case GL_TIME_ELAPSED_EXT:
-      /* For timestamp queries, we record the starting time right away so that
-       * we measure the full time between BeginQuery and EndQuery.  There's
-       * some debate about whether this is the right thing to do.  Our decision
-       * is based on the following text from the ARB_timer_query extension:
-       *
-       * "(5) Should the extension measure total time elapsed between the full
-       *      completion of the BeginQuery and EndQuery commands, or just time
-       *      spent in the graphics library?
-       *
-       *  RESOLVED:  This extension will measure the total time elapsed
-       *  between the full completion of these commands.  Future extensions
-       *  may implement a query to determine time elapsed at different stages
-       *  of the graphics pipeline."
-       *
-       * We write a starting timestamp now (at index 0).  At EndQuery() time,
-       * we'll write a second timestamp (at index 1), and subtract the two to
-       * obtain the time elapsed.  Notably, this includes time elapsed while
-       * the system was doing other work, such as running other applications.
-       */
-      brw_bo_unreference(query->bo);
-      query->bo =
-         brw_bo_alloc(brw->bufmgr, "timer query", 4096, BRW_MEMZONE_OTHER);
-      brw_write_timestamp(brw, query->bo, 0);
-      break;
-
-   case GL_ANY_SAMPLES_PASSED:
-   case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
-   case GL_SAMPLES_PASSED_ARB:
-      /* For occlusion queries, we delay taking an initial sample until the
-       * first drawing occurs in this batch.  See the reasoning in the comments
-       * for brw_emit_query_begin() below.
-       *
-       * Since we're starting a new query, we need to be sure to throw away
-       * any previous occlusion query results.
-       */
-      brw_bo_unreference(query->bo);
-      query->bo = NULL;
-      query->last_index = -1;
-
-      brw->query.obj = query;
-
-      /* Depth statistics on Gfx4 require strange workarounds, so we try to
-       * avoid them when necessary.  They're required for occlusion queries,
-       * so turn them on now.
-       */
-      brw->stats_wm++;
-      brw->ctx.NewDriverState |= BRW_NEW_STATS_WM;
-      break;
-
-   default:
-      unreachable("Unrecognized query target in brw_begin_query()");
-   }
-}
-
-/**
- * Gfx4-5 driver hook for glEndQuery().
- *
- * Emits GPU commands to record a final query value, ending any data capturing.
- * However, the final result isn't necessarily available until the GPU processes
- * those commands.  brw_queryobj_get_results() processes the captured data to
- * produce the final result.
- */
-static void
-brw_end_query(struct gl_context *ctx, struct gl_query_object *q)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_query_object *query = (struct brw_query_object *)q;
-   UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   assert(devinfo->ver < 6);
-
-   switch (query->Base.Target) {
-   case GL_TIME_ELAPSED_EXT:
-      /* Write the final timestamp. */
-      brw_write_timestamp(brw, query->bo, 1);
-      break;
-
-   case GL_ANY_SAMPLES_PASSED:
-   case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
-   case GL_SAMPLES_PASSED_ARB:
-
-      /* No query->bo means that EndQuery was called after BeginQuery with no
-       * intervening drawing. Rather than doing nothing at all here in this
-       * case, we emit the query_begin and query_end state to the
-       * hardware. This is to guarantee that waiting on the result of this
-       * empty state will cause all previous queries to complete at all, as
-       * required by the OpenGL 4.3 (Core Profile) spec, section 4.2.1:
-       *
-       *    "It must always be true that if any query object returns
-       *     a result available of TRUE, all queries of the same type
-       *     issued prior to that query must also return TRUE."
-       */
-      if (!query->bo) {
-         brw_emit_query_begin(brw);
-      }
-
-      assert(query->bo);
-
-      brw_emit_query_end(brw);
-
-      brw->query.obj = NULL;
-
-      brw->stats_wm--;
-      brw->ctx.NewDriverState |= BRW_NEW_STATS_WM;
-      break;
-
-   default:
-      unreachable("Unrecognized query target in brw_end_query()");
-   }
-}
-
-/**
- * The Gfx4-5 WaitQuery() driver hook.
- *
- * Wait for a query result to become available and return it.  This is the
- * backing for glGetQueryObjectiv() with the GL_QUERY_RESULT pname.
- */
-static void brw_wait_query(struct gl_context *ctx, struct gl_query_object *q)
-{
-   struct brw_query_object *query = (struct brw_query_object *)q;
-   UNUSED const struct intel_device_info *devinfo =
-      &brw_context(ctx)->screen->devinfo;
-
-   assert(devinfo->ver < 6);
-
-   brw_queryobj_get_results(ctx, query);
-   query->Base.Ready = true;
-}
-
-/**
- * The Gfx4-5 CheckQuery() driver hook.
- *
- * Checks whether a query result is ready yet.  If not, flushes.
- * This is the backing for glGetQueryObjectiv()'s QUERY_RESULT_AVAILABLE pname.
- */
-static void brw_check_query(struct gl_context *ctx, struct gl_query_object *q)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_query_object *query = (struct brw_query_object *)q;
-   UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   assert(devinfo->ver < 6);
-
-   /* From the GL_ARB_occlusion_query spec:
-    *
-    *     "Instead of allowing for an infinite loop, performing a
-    *      QUERY_RESULT_AVAILABLE_ARB will perform a flush if the result is
-    *      not ready yet on the first time it is queried.  This ensures that
-    *      the async query will return true in finite time.
-    */
-   if (query->bo && brw_batch_references(&brw->batch, query->bo))
-      brw_batch_flush(brw);
-
-   if (query->bo == NULL || !brw_bo_busy(query->bo)) {
-      brw_queryobj_get_results(ctx, query);
-      query->Base.Ready = true;
-   }
-}
-
-/**
- * Ensure there query's BO has enough space to store a new pair of values.
- *
- * If not, gather the existing BO's results and create a new buffer of the
- * same size.
- */
-static void
-ensure_bo_has_space(struct gl_context *ctx, struct brw_query_object *query)
-{
-   struct brw_context *brw = brw_context(ctx);
-   UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   assert(devinfo->ver < 6);
-
-   if (!query->bo || query->last_index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
-
-      if (query->bo != NULL) {
-         /* The old query BO did not have enough space, so we allocated a new
-          * one.  Gather the results so far (adding up the differences) and
-          * release the old BO.
-          */
-         brw_queryobj_get_results(ctx, query);
-      }
-
-      query->bo = brw_bo_alloc(brw->bufmgr, "query", 4096, BRW_MEMZONE_OTHER);
-      query->last_index = 0;
-   }
-}
-
-/**
- * Record the PS_DEPTH_COUNT value (for occlusion queries) just before
- * primitive drawing.
- *
- * In a pre-hardware context world, the single PS_DEPTH_COUNT register is
- * shared among all applications using the GPU.  However, our query value
- * needs to only include fragments generated by our application/GL context.
- *
- * To accommodate this, we record PS_DEPTH_COUNT at the start and end of
- * each batchbuffer (technically, the first primitive drawn and flush time).
- * Subtracting each pair of values calculates the change in PS_DEPTH_COUNT
- * caused by a batchbuffer.  Since there is no preemption inside batches,
- * this is guaranteed to only measure the effects of our current application.
- *
- * Adding each of these differences (in case drawing is done over many batches)
- * produces the final expected value.
- *
- * In a world with hardware contexts, PS_DEPTH_COUNT is saved and restored
- * as part of the context state, so this is unnecessary, and skipped.
- */
-void
-brw_emit_query_begin(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   struct brw_query_object *query = brw->query.obj;
-
-   /* Skip if we're not doing any queries, or we've already recorded the
-    * initial query value for this batchbuffer.
-    */
-   if (!query || brw->query.begin_emitted)
-      return;
-
-   ensure_bo_has_space(ctx, query);
-
-   brw_write_depth_count(brw, query->bo, query->last_index * 2);
-
-   brw->query.begin_emitted = true;
-}
-
-/**
- * Called at batchbuffer flush to get an ending PS_DEPTH_COUNT
- * (for non-hardware context platforms).
- *
- * See the explanation in brw_emit_query_begin().
- */
-void
-brw_emit_query_end(struct brw_context *brw)
-{
-   struct brw_query_object *query = brw->query.obj;
-
-   if (!brw->query.begin_emitted)
-      return;
-
-   brw_write_depth_count(brw, query->bo, query->last_index * 2 + 1);
-
-   brw->query.begin_emitted = false;
-   query->last_index++;
-}
-
-/**
- * Driver hook for glQueryCounter().
- *
- * This handles GL_TIMESTAMP queries, which perform a pipelined read of the
- * current GPU time.  This is unlike GL_TIME_ELAPSED, which measures the
- * time while the query is active.
- */
-void
-brw_query_counter(struct gl_context *ctx, struct gl_query_object *q)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_query_object *query = (struct brw_query_object *) q;
-
-   assert(q->Target == GL_TIMESTAMP);
-
-   brw_bo_unreference(query->bo);
-   query->bo =
-      brw_bo_alloc(brw->bufmgr, "timestamp query", 4096, BRW_MEMZONE_OTHER);
-   brw_write_timestamp(brw, query->bo, 0);
-
-   query->flushed = false;
-}
-
-/**
- * Read the TIMESTAMP register immediately (in a non-pipelined fashion).
- *
- * This is used to implement the GetTimestamp() driver hook.
- */
-static uint64_t
-brw_get_timestamp(struct gl_context *ctx)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   uint64_t result = 0;
-
-   switch (brw->screen->hw_has_timestamp) {
-   case 3: /* New kernel, always full 36bit accuracy */
-      brw_reg_read(brw->bufmgr, TIMESTAMP | 1, &result);
-      break;
-   case 2: /* 64bit kernel, result is left-shifted by 32bits, losing 4bits */
-      brw_reg_read(brw->bufmgr, TIMESTAMP, &result);
-      result = result >> 32;
-      break;
-   case 1: /* 32bit kernel, result is 36bit wide but may be inaccurate! */
-      brw_reg_read(brw->bufmgr, TIMESTAMP, &result);
-      break;
-   }
-
-   /* Scale to nanosecond units */
-   result = intel_device_info_timebase_scale(devinfo, result);
-
-   /* Ensure the scaled timestamp overflows according to
-    * GL_QUERY_COUNTER_BITS.  Technically this isn't required if
-    * querying GL_TIMESTAMP via glGetInteger but it seems best to keep
-    * QueryObject and GetInteger timestamps consistent.
-    */
-   result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1;
-   return result;
-}
-
-/**
- * Is this type of query written by PIPE_CONTROL?
- */
-bool
-brw_is_query_pipelined(struct brw_query_object *query)
-{
-   switch (query->Base.Target) {
-   case GL_TIMESTAMP:
-   case GL_TIME_ELAPSED:
-   case GL_ANY_SAMPLES_PASSED:
-   case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
-   case GL_SAMPLES_PASSED_ARB:
-      return true;
-
-   case GL_PRIMITIVES_GENERATED:
-   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
-   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
-   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
-   case GL_VERTICES_SUBMITTED_ARB:
-   case GL_PRIMITIVES_SUBMITTED_ARB:
-   case GL_VERTEX_SHADER_INVOCATIONS_ARB:
-   case GL_GEOMETRY_SHADER_INVOCATIONS:
-   case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
-   case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
-   case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
-   case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
-   case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
-   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
-   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
-      return false;
-
-   default:
-      unreachable("Unrecognized query target in is_query_pipelined()");
-   }
-}
-
-/* Initialize query object functions used on all generations. */
-void brw_init_common_queryobj_functions(struct dd_function_table *functions)
-{
-   functions->NewQueryObject = brw_new_query_object;
-   functions->DeleteQuery = brw_delete_query;
-   functions->GetTimestamp = brw_get_timestamp;
-}
-
-/* Initialize Gfx4/5-specific query object functions. */
-void gfx4_init_queryobj_functions(struct dd_function_table *functions)
-{
-   functions->BeginQuery = brw_begin_query;
-   functions->EndQuery = brw_end_query;
-   functions->CheckQuery = brw_check_query;
-   functions->WaitQuery = brw_wait_query;
-   functions->QueryCounter = brw_query_counter;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_reset.c b/src/mesa/drivers/dri/i965/brw_reset.c
deleted file mode 100644
index 9051878..0000000
--- a/src/mesa/drivers/dri/i965/brw_reset.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright Â© 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/context.h"
-
-#include <xf86drm.h>
-#include "brw_context.h"
-
-/**
- * Query information about GPU resets observed by this context
- *
- * Called via \c dd_function_table::GetGraphicsResetStatus.
- */
-GLenum
-brw_get_graphics_reset_status(struct gl_context *ctx)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct drm_i915_reset_stats stats = { .ctx_id = brw->hw_ctx };
-
-   /* If hardware contexts are not being used (or
-    * DRM_IOCTL_I915_GET_RESET_STATS is not supported), this function should
-    * not be accessible.
-    */
-   assert(brw->hw_ctx != 0);
-
-   /* A reset status other than NO_ERROR was returned last time. I915 returns
-    * nonzero active/pending only if reset has been encountered and completed.
-    * Return NO_ERROR from now on.
-    */
-   if (brw->reset_count != 0)
-      return GL_NO_ERROR;
-
-   if (drmIoctl(brw->screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats) != 0)
-      return GL_NO_ERROR;
-
-   /* A reset was observed while a batch from this context was executing.
-    * Assume that this context was at fault.
-    */
-   if (stats.batch_active != 0) {
-      brw->reset_count = stats.reset_count;
-      return GL_GUILTY_CONTEXT_RESET_ARB;
-   }
-
-   /* A reset was observed while a batch from this context was in progress,
-    * but the batch was not executing.  In this case, assume that the context
-    * was not at fault.
-    */
-   if (stats.batch_pending != 0) {
-      brw->reset_count = stats.reset_count;
-      return GL_INNOCENT_CONTEXT_RESET_ARB;
-   }
-
-   return GL_NO_ERROR;
-}
-
-void
-brw_check_for_reset(struct brw_context *brw)
-{
-   struct drm_i915_reset_stats stats = { .ctx_id = brw->hw_ctx };
-
-   if (drmIoctl(brw->screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats) != 0)
-      return;
-
-   if (stats.batch_active > 0 || stats.batch_pending > 0)
-      _mesa_set_context_lost_dispatch(&brw->ctx);
-}
diff --git a/src/mesa/drivers/dri/i965/brw_screen.c b/src/mesa/drivers/dri/i965/brw_screen.c
deleted file mode 100644
index 4d02e73..0000000
--- a/src/mesa/drivers/dri/i965/brw_screen.c
+++ /dev/null
@@ -1,2886 +0,0 @@
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "drm-uapi/drm_fourcc.h"
-#include <errno.h>
-#include <time.h>
-#include <unistd.h>
-#include "main/context.h"
-#include "main/framebuffer.h"
-#include "main/renderbuffer.h"
-#include "main/texobj.h"
-#include "main/hash.h"
-#include "main/fbobject.h"
-#include "main/version.h"
-#include "main/glthread.h"
-#include "swrast/s_renderbuffer.h"
-#include "util/ralloc.h"
-#include "util/disk_cache.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "compiler/nir/nir.h"
-
-#include "utils.h"
-#include "util/disk_cache.h"
-#include "util/driconf.h"
-#include "util/u_cpu_detect.h"
-#include "util/u_memory.h"
-
-#include "common/intel_defines.h"
-
-static const driOptionDescription brw_driconf[] = {
-   DRI_CONF_SECTION_PERFORMANCE
-      /* Options correspond to DRI_CONF_BO_REUSE_DISABLED,
-       * DRI_CONF_BO_REUSE_ALL
-       */
-      DRI_CONF_OPT_E(bo_reuse, 1, 0, 1,
-                     "Buffer object reuse",
-                     DRI_CONF_ENUM(0, "Disable buffer object reuse")
-                     DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects"))
-      DRI_CONF_MESA_NO_ERROR(false)
-      DRI_CONF_MESA_GLTHREAD(false)
-   DRI_CONF_SECTION_END
-
-   DRI_CONF_SECTION_QUALITY
-      DRI_CONF_PRECISE_TRIG(false)
-
-      DRI_CONF_OPT_I(clamp_max_samples, -1, 0, 0,
-                     "Clamp the value of GL_MAX_SAMPLES to the "
-                     "given integer. If negative, then do not clamp.")
-   DRI_CONF_SECTION_END
-
-   DRI_CONF_SECTION_DEBUG
-      DRI_CONF_ALWAYS_FLUSH_BATCH(false)
-      DRI_CONF_ALWAYS_FLUSH_CACHE(false)
-      DRI_CONF_DISABLE_THROTTLING(false)
-      DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN(false)
-      DRI_CONF_FORCE_GLSL_VERSION(0)
-      DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS(false)
-      DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED(false)
-      DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION(false)
-      DRI_CONF_ALLOW_EXTRA_PP_TOKENS(false)
-      DRI_CONF_ALLOW_GLSL_EXTENSION_DIRECTIVE_MIDSHADER(false)
-      DRI_CONF_ALLOW_GLSL_BUILTIN_VARIABLE_REDECLARATION(false)
-      DRI_CONF_ALLOW_GLSL_CROSS_STAGE_INTERPOLATION_MISMATCH(false)
-      DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION(false)
-      DRI_CONF_FORCE_COMPAT_PROFILE(false)
-      DRI_CONF_FORCE_GLSL_ABS_SQRT(false)
-      DRI_CONF_FORCE_GL_VENDOR()
-
-      DRI_CONF_OPT_B(shader_precompile, true, "Perform code generation at shader link time.")
-   DRI_CONF_SECTION_END
-
-   DRI_CONF_SECTION_MISCELLANEOUS
-      DRI_CONF_GLSL_ZERO_INIT(false)
-      DRI_CONF_VS_POSITION_ALWAYS_INVARIANT(false)
-      DRI_CONF_VS_POSITION_ALWAYS_PRECISE(false)
-      DRI_CONF_ALLOW_RGB10_CONFIGS(false)
-      DRI_CONF_ALLOW_RGB565_CONFIGS(true)
-   DRI_CONF_SECTION_END
-};
-
-static char *
-brw_driconf_get_xml(UNUSED const char *driver_name)
-{
-   return driGetOptionsXml(brw_driconf, ARRAY_SIZE(brw_driconf));
-}
-
-static const __DRIconfigOptionsExtension brw_config_options = {
-   .base = { __DRI_CONFIG_OPTIONS, 2 },
-   .xml = NULL,
-   .getXml = brw_driconf_get_xml,
-};
-
-#include "brw_batch.h"
-#include "brw_buffers.h"
-#include "brw_bufmgr.h"
-#include "brw_fbo.h"
-#include "brw_mipmap_tree.h"
-#include "brw_screen.h"
-#include "brw_tex.h"
-#include "brw_image.h"
-
-#include "brw_context.h"
-
-#include "drm-uapi/i915_drm.h"
-
-/**
- * For debugging purposes, this returns a time in seconds.
- */
-double
-get_time(void)
-{
-   struct timespec tp;
-
-   clock_gettime(CLOCK_MONOTONIC, &tp);
-
-   return tp.tv_sec + tp.tv_nsec / 1000000000.0;
-}
-
-static const __DRItexBufferExtension brwTexBufferExtension = {
-   .base = { __DRI_TEX_BUFFER, 3 },
-
-   .setTexBuffer        = brw_set_texbuffer,
-   .setTexBuffer2       = brw_set_texbuffer2,
-   .releaseTexBuffer    = brw_release_texbuffer,
-};
-
-static void
-brw_dri2_flush_with_flags(__DRIcontext *cPriv,
-                            __DRIdrawable *dPriv,
-                            unsigned flags,
-                            enum __DRI2throttleReason reason)
-{
-   struct brw_context *brw = cPriv->driverPrivate;
-
-   if (!brw)
-      return;
-
-   struct gl_context *ctx = &brw->ctx;
-
-   _mesa_glthread_finish(ctx);
-
-   FLUSH_VERTICES(ctx, 0, 0);
-
-   if (flags & __DRI2_FLUSH_DRAWABLE)
-      brw_resolve_for_dri2_flush(brw, dPriv);
-
-   if (reason == __DRI2_THROTTLE_SWAPBUFFER)
-      brw->need_swap_throttle = true;
-   if (reason == __DRI2_THROTTLE_FLUSHFRONT)
-      brw->need_flush_throttle = true;
-
-   brw_batch_flush(brw);
-}
-
-/**
- * Provides compatibility with loaders that only support the older (version
- * 1-3) flush interface.
- *
- * That includes libGL up to Mesa 9.0, and the X Server at least up to 1.13.
- */
-static void
-brw_dri2_flush(__DRIdrawable *drawable)
-{
-   brw_dri2_flush_with_flags(drawable->driContextPriv, drawable,
-                               __DRI2_FLUSH_DRAWABLE,
-                               __DRI2_THROTTLE_SWAPBUFFER);
-}
-
-static const struct __DRI2flushExtensionRec brwFlushExtension = {
-    .base = { __DRI2_FLUSH, 4 },
-
-    .flush              = brw_dri2_flush,
-    .invalidate         = dri2InvalidateDrawable,
-    .flush_with_flags   = brw_dri2_flush_with_flags,
-};
-
-static const struct brw_image_format brw_image_formats[] = {
-   { DRM_FORMAT_ABGR16161616F, __DRI_IMAGE_COMPONENTS_RGBA, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR16161616F, 8 } } },
-
-   { DRM_FORMAT_XBGR16161616F, __DRI_IMAGE_COMPONENTS_RGB, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR16161616F, 8 } } },
-
-   { DRM_FORMAT_ARGB2101010, __DRI_IMAGE_COMPONENTS_RGBA, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB2101010, 4 } } },
-
-   { DRM_FORMAT_XRGB2101010, __DRI_IMAGE_COMPONENTS_RGB, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB2101010, 4 } } },
-
-   { DRM_FORMAT_ABGR2101010, __DRI_IMAGE_COMPONENTS_RGBA, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR2101010, 4 } } },
-
-   { DRM_FORMAT_XBGR2101010, __DRI_IMAGE_COMPONENTS_RGB, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR2101010, 4 } } },
-
-   { DRM_FORMAT_ARGB8888, __DRI_IMAGE_COMPONENTS_RGBA, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } },
-
-   { DRM_FORMAT_ABGR8888, __DRI_IMAGE_COMPONENTS_RGBA, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR8888, 4 } } },
-
-   { __DRI_IMAGE_FOURCC_SARGB8888, __DRI_IMAGE_COMPONENTS_RGBA, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_SARGB8, 4 } } },
-
-   { __DRI_IMAGE_FOURCC_SXRGB8888, __DRI_IMAGE_COMPONENTS_RGB, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_SXRGB8, 4 } } },
-
-   { DRM_FORMAT_XRGB8888, __DRI_IMAGE_COMPONENTS_RGB, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB8888, 4 }, } },
-
-   { DRM_FORMAT_XBGR8888, __DRI_IMAGE_COMPONENTS_RGB, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR8888, 4 }, } },
-
-   { DRM_FORMAT_ARGB1555, __DRI_IMAGE_COMPONENTS_RGBA, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB1555, 2 } } },
-
-   { DRM_FORMAT_RGB565, __DRI_IMAGE_COMPONENTS_RGB, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_RGB565, 2 } } },
-
-   { DRM_FORMAT_R8, __DRI_IMAGE_COMPONENTS_R, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, } },
-
-   { DRM_FORMAT_R16, __DRI_IMAGE_COMPONENTS_R, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 1 }, } },
-
-   { DRM_FORMAT_GR88, __DRI_IMAGE_COMPONENTS_RG, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 }, } },
-
-   { DRM_FORMAT_GR1616, __DRI_IMAGE_COMPONENTS_RG, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR1616, 2 }, } },
-
-   { DRM_FORMAT_YUV410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 2, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
-   { DRM_FORMAT_YUV411, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 1, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 2, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
-   { DRM_FORMAT_YUV420, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 1, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 2, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
-   { DRM_FORMAT_YUV422, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 1, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 2, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
-   { DRM_FORMAT_YUV444, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 1, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 2, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
-   { DRM_FORMAT_YVU410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 2, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
-   { DRM_FORMAT_YVU411, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 2, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 1, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
-   { DRM_FORMAT_YVU420, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 2, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 1, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
-   { DRM_FORMAT_YVU422, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 2, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 1, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
-   { DRM_FORMAT_YVU444, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 2, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 1, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
-
-   { DRM_FORMAT_NV12, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 1, 1, 1, __DRI_IMAGE_FORMAT_GR88, 2 } } },
-
-   { DRM_FORMAT_P010, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 },
-       { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } },
-
-   { DRM_FORMAT_P012, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 },
-       { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } },
-
-   { DRM_FORMAT_P016, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 },
-       { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } },
-
-   { DRM_FORMAT_NV16, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
-       { 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } },
-
-   { DRM_FORMAT_AYUV, __DRI_IMAGE_COMPONENTS_AYUV, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR8888, 4 } } },
-
-   { DRM_FORMAT_XYUV8888, __DRI_IMAGE_COMPONENTS_XYUV, 1,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR8888, 4 } } },
-
-   /* For YUYV and UYVY buffers, we set up two overlapping DRI images
-    * and treat them as planar buffers in the compositors.
-    * Plane 0 is GR88 and samples YU or YV pairs and places Y into
-    * the R component, while plane 1 is ARGB/ABGR and samples YUYV/UYVY
-    * clusters and places pairs and places U into the G component and
-    * V into A.  This lets the texture sampler interpolate the Y
-    * components correctly when sampling from plane 0, and interpolate
-    * U and V correctly when sampling from plane 1. */
-   { DRM_FORMAT_YUYV, __DRI_IMAGE_COMPONENTS_Y_XUXV, 2,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 },
-       { 0, 1, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } },
-   { DRM_FORMAT_UYVY, __DRI_IMAGE_COMPONENTS_Y_UXVX, 2,
-     { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 },
-       { 0, 1, 0, __DRI_IMAGE_FORMAT_ABGR8888, 4 } } }
-};
-
-static const struct {
-   uint64_t modifier;
-   unsigned since_ver;
-} supported_modifiers[] = {
-   { .modifier = DRM_FORMAT_MOD_LINEAR       , .since_ver = 1 },
-   { .modifier = I915_FORMAT_MOD_X_TILED     , .since_ver = 1 },
-   { .modifier = I915_FORMAT_MOD_Y_TILED     , .since_ver = 6 },
-   { .modifier = I915_FORMAT_MOD_Y_TILED_CCS , .since_ver = 9 },
-};
-
-static bool
-modifier_is_supported(const struct intel_device_info *devinfo,
-                      const struct brw_image_format *fmt, int dri_format,
-                      unsigned use, uint64_t modifier)
-{
-   const struct isl_drm_modifier_info *modinfo =
-      isl_drm_modifier_get_info(modifier);
-   int i;
-
-   /* ISL had better know about the modifier */
-   if (!modinfo)
-      return false;
-
-   if (devinfo->ver < 9 && (use & __DRI_IMAGE_USE_SCANOUT) &&
-       !(modinfo->tiling == ISL_TILING_LINEAR ||
-         modinfo->tiling == ISL_TILING_X))
-      return false;
-
-   if (modinfo->aux_usage == ISL_AUX_USAGE_CCS_E) {
-      /* If INTEL_DEBUG=norbc is set, don't support any CCS_E modifiers */
-      if (INTEL_DEBUG(DEBUG_NO_RBC))
-         return false;
-
-      /* CCS_E is not supported for planar images */
-      if (fmt && fmt->nplanes > 1)
-         return false;
-
-      if (fmt) {
-         assert(dri_format == 0);
-         dri_format = fmt->planes[0].dri_format;
-      }
-
-      mesa_format format = driImageFormatToGLFormat(dri_format);
-      /* Whether or not we support compression is based on the RGBA non-sRGB
-       * version of the format.
-       */
-      format = _mesa_format_fallback_rgbx_to_rgba(format);
-      format = _mesa_get_srgb_format_linear(format);
-      if (!isl_format_supports_ccs_e(devinfo,
-                                     brw_isl_format_for_mesa_format(format)))
-         return false;
-   }
-
-   for (i = 0; i < ARRAY_SIZE(supported_modifiers); i++) {
-      if (supported_modifiers[i].modifier != modifier)
-         continue;
-
-      return supported_modifiers[i].since_ver <= devinfo->ver;
-   }
-
-   return false;
-}
-
-static uint64_t
-tiling_to_modifier(uint32_t tiling)
-{
-   static const uint64_t map[] = {
-      [I915_TILING_NONE]   = DRM_FORMAT_MOD_LINEAR,
-      [I915_TILING_X]      = I915_FORMAT_MOD_X_TILED,
-      [I915_TILING_Y]      = I915_FORMAT_MOD_Y_TILED,
-   };
-
-   assert(tiling < ARRAY_SIZE(map));
-
-   return map[tiling];
-}
-
-static void
-brw_image_warn_if_unaligned(__DRIimage *image, const char *func)
-{
-   uint32_t tiling, swizzle;
-   brw_bo_get_tiling(image->bo, &tiling, &swizzle);
-
-   if (tiling != I915_TILING_NONE && (image->offset & 0xfff)) {
-      _mesa_warning(NULL, "%s: offset 0x%08x not on tile boundary",
-                    func, image->offset);
-   }
-}
-
-static const struct brw_image_format *
-brw_image_format_lookup(int fourcc)
-{
-   for (unsigned i = 0; i < ARRAY_SIZE(brw_image_formats); i++) {
-      if (brw_image_formats[i].fourcc == fourcc)
-         return &brw_image_formats[i];
-   }
-
-   return NULL;
-}
-
-static bool
-brw_image_get_fourcc(__DRIimage *image, int *fourcc)
-{
-   if (image->planar_format) {
-      *fourcc = image->planar_format->fourcc;
-      return true;
-   }
-
-   for (unsigned i = 0; i < ARRAY_SIZE(brw_image_formats); i++) {
-      if (brw_image_formats[i].planes[0].dri_format == image->dri_format) {
-         *fourcc = brw_image_formats[i].fourcc;
-         return true;
-      }
-   }
-   return false;
-}
-
-static __DRIimage *
-brw_allocate_image(struct brw_screen *screen, int dri_format,
-                   void *loaderPrivate)
-{
-    __DRIimage *image;
-
-    image = calloc(1, sizeof *image);
-    if (image == NULL)
-       return NULL;
-
-    image->screen = screen;
-    image->dri_format = dri_format;
-    image->offset = 0;
-
-    image->format = driImageFormatToGLFormat(dri_format);
-    if (dri_format != __DRI_IMAGE_FORMAT_NONE &&
-        image->format == MESA_FORMAT_NONE) {
-       free(image);
-       return NULL;
-    }
-
-    image->internal_format = _mesa_get_format_base_format(image->format);
-    image->driScrnPriv = screen->driScrnPriv;
-    image->loader_private = loaderPrivate;
-
-    return image;
-}
-
-/**
- * Sets up a DRIImage structure to point to a slice out of a miptree.
- */
-static void
-brw_setup_image_from_mipmap_tree(struct brw_context *brw, __DRIimage *image,
-                                 struct brw_mipmap_tree *mt, GLuint level,
-                                 GLuint zoffset)
-{
-   brw_miptree_make_shareable(brw, mt);
-
-   brw_miptree_check_level_layer(mt, level, zoffset);
-
-   image->width = minify(mt->surf.phys_level0_sa.width,
-                         level - mt->first_level);
-   image->height = minify(mt->surf.phys_level0_sa.height,
-                          level - mt->first_level);
-   image->pitch = mt->surf.row_pitch_B;
-
-   image->offset = brw_miptree_get_tile_offsets(mt, level, zoffset,
-                                                  &image->tile_x,
-                                                  &image->tile_y);
-
-   brw_bo_unreference(image->bo);
-   image->bo = mt->bo;
-   brw_bo_reference(mt->bo);
-}
-
-static __DRIimage *
-brw_create_image_from_name(__DRIscreen *dri_screen,
-                           int width, int height, int format,
-                           int name, int pitch, void *loaderPrivate)
-{
-    struct brw_screen *screen = dri_screen->driverPrivate;
-    __DRIimage *image;
-    int cpp;
-
-    image = brw_allocate_image(screen, format, loaderPrivate);
-    if (image == NULL)
-       return NULL;
-
-    if (image->format == MESA_FORMAT_NONE)
-       cpp = 1;
-    else
-       cpp = _mesa_get_format_bytes(image->format);
-
-    image->width = width;
-    image->height = height;
-    image->pitch = pitch * cpp;
-    image->bo = brw_bo_gem_create_from_name(screen->bufmgr, "image",
-                                                  name);
-    if (!image->bo) {
-       free(image);
-       return NULL;
-    }
-    image->modifier = tiling_to_modifier(image->bo->tiling_mode);
-
-    return image;
-}
-
-static __DRIimage *
-brw_create_image_from_renderbuffer(__DRIcontext *context,
-                                   int renderbuffer, void *loaderPrivate)
-{
-   __DRIimage *image;
-   struct brw_context *brw = context->driverPrivate;
-   struct gl_context *ctx = &brw->ctx;
-   struct gl_renderbuffer *rb;
-   struct brw_renderbuffer *irb;
-
-   rb = _mesa_lookup_renderbuffer(ctx, renderbuffer);
-   if (!rb) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glRenderbufferExternalMESA");
-      return NULL;
-   }
-
-   irb = brw_renderbuffer(rb);
-   brw_miptree_make_shareable(brw, irb->mt);
-   image = calloc(1, sizeof *image);
-   if (image == NULL)
-      return NULL;
-
-   image->internal_format = rb->InternalFormat;
-   image->format = rb->Format;
-   image->modifier = tiling_to_modifier(
-                        isl_tiling_to_i915_tiling(irb->mt->surf.tiling));
-   image->offset = 0;
-   image->driScrnPriv = context->driScreenPriv;
-   image->loader_private = loaderPrivate;
-   brw_bo_unreference(image->bo);
-   image->bo = irb->mt->bo;
-   brw_bo_reference(irb->mt->bo);
-   image->width = rb->Width;
-   image->height = rb->Height;
-   image->pitch = irb->mt->surf.row_pitch_B;
-   image->dri_format = driGLFormatToImageFormat(image->format);
-   image->has_depthstencil = irb->mt->stencil_mt? true : false;
-
-   rb->NeedsFinishRenderTexture = true;
-   return image;
-}
-
-static __DRIimage *
-brw_create_image_from_texture(__DRIcontext *context, int target,
-                              unsigned texture, int zoffset,
-                              int level,
-                              unsigned *error,
-                              void *loaderPrivate)
-{
-   __DRIimage *image;
-   struct brw_context *brw = context->driverPrivate;
-   struct gl_texture_object *obj;
-   struct brw_texture_object *iobj;
-   GLuint face = 0;
-
-   obj = _mesa_lookup_texture(&brw->ctx, texture);
-   if (!obj || obj->Target != target) {
-      *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
-      return NULL;
-   }
-
-   if (target == GL_TEXTURE_CUBE_MAP)
-      face = zoffset;
-
-   _mesa_test_texobj_completeness(&brw->ctx, obj);
-   iobj = brw_texture_object(obj);
-   if (!obj->_BaseComplete || (level > 0 && !obj->_MipmapComplete)) {
-      *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
-      return NULL;
-   }
-
-   if (level < obj->Attrib.BaseLevel || level > obj->_MaxLevel) {
-      *error = __DRI_IMAGE_ERROR_BAD_MATCH;
-      return NULL;
-   }
-
-   if (target == GL_TEXTURE_3D && obj->Image[face][level]->Depth < zoffset) {
-      *error = __DRI_IMAGE_ERROR_BAD_MATCH;
-      return NULL;
-   }
-   image = calloc(1, sizeof *image);
-   if (image == NULL) {
-      *error = __DRI_IMAGE_ERROR_BAD_ALLOC;
-      return NULL;
-   }
-
-   image->internal_format = obj->Image[face][level]->InternalFormat;
-   image->format = obj->Image[face][level]->TexFormat;
-   image->modifier = tiling_to_modifier(
-                        isl_tiling_to_i915_tiling(iobj->mt->surf.tiling));
-   image->driScrnPriv = context->driScreenPriv;
-   image->loader_private = loaderPrivate;
-   brw_setup_image_from_mipmap_tree(brw, image, iobj->mt, level, zoffset);
-   image->dri_format = driGLFormatToImageFormat(image->format);
-   image->has_depthstencil = iobj->mt->stencil_mt? true : false;
-   image->planar_format = iobj->planar_format;
-   if (image->dri_format == __DRI_IMAGE_FORMAT_NONE) {
-      *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
-      free(image);
-      return NULL;
-   }
-
-   *error = __DRI_IMAGE_ERROR_SUCCESS;
-   return image;
-}
-
-static void
-brw_destroy_image(__DRIimage *image)
-{
-   const __DRIscreen * driScreen = image->driScrnPriv;
-   const __DRIimageLoaderExtension *imgLoader = driScreen->image.loader;
-   const __DRIdri2LoaderExtension *dri2Loader = driScreen->dri2.loader;
-
-   if (imgLoader && imgLoader->base.version >= 4 &&
-         imgLoader->destroyLoaderImageState) {
-      imgLoader->destroyLoaderImageState(image->loader_private);
-   } else if (dri2Loader && dri2Loader->base.version >= 5 &&
-         dri2Loader->destroyLoaderImageState) {
-      dri2Loader->destroyLoaderImageState(image->loader_private);
-   }
-
-   brw_bo_unreference(image->bo);
-   free(image);
-}
-
-enum modifier_priority {
-   MODIFIER_PRIORITY_INVALID = 0,
-   MODIFIER_PRIORITY_LINEAR,
-   MODIFIER_PRIORITY_X,
-   MODIFIER_PRIORITY_Y,
-   MODIFIER_PRIORITY_Y_CCS,
-};
-
-const uint64_t priority_to_modifier[] = {
-   [MODIFIER_PRIORITY_INVALID] = DRM_FORMAT_MOD_INVALID,
-   [MODIFIER_PRIORITY_LINEAR] = DRM_FORMAT_MOD_LINEAR,
-   [MODIFIER_PRIORITY_X] = I915_FORMAT_MOD_X_TILED,
-   [MODIFIER_PRIORITY_Y] = I915_FORMAT_MOD_Y_TILED,
-   [MODIFIER_PRIORITY_Y_CCS] = I915_FORMAT_MOD_Y_TILED_CCS,
-};
-
-static uint64_t
-select_best_modifier(struct intel_device_info *devinfo,
-                     int dri_format,
-                     unsigned use,
-                     const uint64_t *modifiers,
-                     const unsigned count)
-{
-   enum modifier_priority prio = MODIFIER_PRIORITY_INVALID;
-
-   for (int i = 0; i < count; i++) {
-      if (!modifier_is_supported(devinfo, NULL, dri_format, use, modifiers[i]))
-         continue;
-
-      switch (modifiers[i]) {
-      case I915_FORMAT_MOD_Y_TILED_CCS:
-         prio = MAX2(prio, MODIFIER_PRIORITY_Y_CCS);
-         break;
-      case I915_FORMAT_MOD_Y_TILED:
-         prio = MAX2(prio, MODIFIER_PRIORITY_Y);
-         break;
-      case I915_FORMAT_MOD_X_TILED:
-         prio = MAX2(prio, MODIFIER_PRIORITY_X);
-         break;
-      case DRM_FORMAT_MOD_LINEAR:
-         prio = MAX2(prio, MODIFIER_PRIORITY_LINEAR);
-         break;
-      case DRM_FORMAT_MOD_INVALID:
-      default:
-         break;
-      }
-   }
-
-   return priority_to_modifier[prio];
-}
-
-static __DRIimage *
-brw_create_image_common(__DRIscreen *dri_screen,
-                        int width, int height, int format,
-                        unsigned int use,
-                        const uint64_t *modifiers,
-                        unsigned count,
-                        void *loaderPrivate)
-{
-   __DRIimage *image;
-   struct brw_screen *screen = dri_screen->driverPrivate;
-   uint64_t modifier = DRM_FORMAT_MOD_INVALID;
-   bool ok;
-
-   if (use & __DRI_IMAGE_USE_CURSOR) {
-      if (width != 64 || height != 64)
-         return NULL;
-      modifier = DRM_FORMAT_MOD_LINEAR;
-   }
-
-   if (use & __DRI_IMAGE_USE_LINEAR)
-      modifier = DRM_FORMAT_MOD_LINEAR;
-
-   if (modifier == DRM_FORMAT_MOD_INVALID) {
-      if (modifiers) {
-         /* User requested specific modifiers */
-         modifier = select_best_modifier(&screen->devinfo, format, use,
-                                         modifiers, count);
-         if (modifier == DRM_FORMAT_MOD_INVALID)
-            return NULL;
-      } else {
-         /* Historically, X-tiled was the default, and so lack of modifier means
-          * X-tiled.
-          */
-         modifier = I915_FORMAT_MOD_X_TILED;
-      }
-   }
-
-   image = brw_allocate_image(screen, format, loaderPrivate);
-   if (image == NULL)
-      return NULL;
-
-   const struct isl_drm_modifier_info *mod_info =
-      isl_drm_modifier_get_info(modifier);
-
-   struct isl_surf surf;
-   ok = isl_surf_init(&screen->isl_dev, &surf,
-                      .dim = ISL_SURF_DIM_2D,
-                      .format = brw_isl_format_for_mesa_format(image->format),
-                      .width = width,
-                      .height = height,
-                      .depth = 1,
-                      .levels = 1,
-                      .array_len = 1,
-                      .samples = 1,
-                      .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT |
-                               ISL_SURF_USAGE_TEXTURE_BIT |
-                               ISL_SURF_USAGE_STORAGE_BIT |
-                               ((use & __DRI_IMAGE_USE_SCANOUT) ?
-                                ISL_SURF_USAGE_DISPLAY_BIT : 0),
-                      .tiling_flags = (1 << mod_info->tiling));
-   assert(ok);
-   if (!ok) {
-      free(image);
-      return NULL;
-   }
-
-   struct isl_surf aux_surf = {0,};
-   if (mod_info->aux_usage == ISL_AUX_USAGE_CCS_E) {
-      ok = isl_surf_get_ccs_surf(&screen->isl_dev, &surf, NULL, &aux_surf, 0);
-      if (!ok) {
-         free(image);
-         return NULL;
-      }
-   } else {
-      assert(mod_info->aux_usage == ISL_AUX_USAGE_NONE);
-      aux_surf.size_B = 0;
-   }
-
-   /* We request that the bufmgr zero the buffer for us for two reasons:
-    *
-    *  1) If a buffer gets re-used from the pool, we don't want to leak random
-    *     garbage from our process to some other.
-    *
-    *  2) For images with CCS_E, we want to ensure that the CCS starts off in
-    *     a valid state.  A CCS value of 0 indicates that the given block is
-    *     in the pass-through state which is what we want.
-    */
-   image->bo = brw_bo_alloc_tiled(screen->bufmgr, "image",
-                                  surf.size_B + aux_surf.size_B,
-                                  BRW_MEMZONE_OTHER,
-                                  isl_tiling_to_i915_tiling(mod_info->tiling),
-                                  surf.row_pitch_B, BO_ALLOC_ZEROED);
-   if (image->bo == NULL) {
-      free(image);
-      return NULL;
-   }
-   image->width = width;
-   image->height = height;
-   image->pitch = surf.row_pitch_B;
-   image->modifier = modifier;
-
-   if (aux_surf.size_B) {
-      image->aux_offset = surf.size_B;
-      image->aux_pitch = aux_surf.row_pitch_B;
-      image->aux_size = aux_surf.size_B;
-   }
-
-   return image;
-}
-
-static __DRIimage *
-brw_create_image(__DRIscreen *dri_screen,
-                 int width, int height, int format,
-                 unsigned int use,
-                 void *loaderPrivate)
-{
-   return brw_create_image_common(dri_screen, width, height, format, use,
-                                  NULL, 0, loaderPrivate);
-}
-
-static void *
-brw_map_image(__DRIcontext *context, __DRIimage *image,
-              int x0, int y0, int width, int height,
-              unsigned int flags, int *stride, void **map_info)
-{
-   struct brw_context *brw = NULL;
-   struct brw_bo *bo = NULL;
-   void *raw_data = NULL;
-   GLuint pix_w = 1;
-   GLuint pix_h = 1;
-   GLint pix_bytes = 1;
-
-   if (!context || !image || !stride || !map_info || *map_info)
-      return NULL;
-
-   if (x0 < 0 || x0 >= image->width || width > image->width - x0)
-      return NULL;
-
-   if (y0 < 0 || y0 >= image->height || height > image->height - y0)
-      return NULL;
-
-   if (flags & MAP_INTERNAL_MASK)
-      return NULL;
-
-   brw = context->driverPrivate;
-   bo = image->bo;
-
-   assert(brw);
-   assert(bo);
-
-   /* DRI flags and GL_MAP.*_BIT flags are the same, so just pass them on. */
-   raw_data = brw_bo_map(brw, bo, flags);
-   if (!raw_data)
-      return NULL;
-
-   _mesa_get_format_block_size(image->format, &pix_w, &pix_h);
-   pix_bytes = _mesa_get_format_bytes(image->format);
-
-   assert(pix_w);
-   assert(pix_h);
-   assert(pix_bytes > 0);
-
-   raw_data += (x0 / pix_w) * pix_bytes + (y0 / pix_h) * image->pitch;
-
-   brw_bo_reference(bo);
-
-   *stride = image->pitch;
-   *map_info = bo;
-
-   return raw_data;
-}
-
-static void
-brw_unmap_image(UNUSED __DRIcontext *context, UNUSED __DRIimage *image,
-                void *map_info)
-{
-   struct brw_bo *bo = map_info;
-
-   brw_bo_unmap(bo);
-   brw_bo_unreference(bo);
-}
-
-static __DRIimage *
-brw_create_image_with_modifiers(__DRIscreen *dri_screen,
-                                  int width, int height, int format,
-                                  const uint64_t *modifiers,
-                                  const unsigned count,
-                                  void *loaderPrivate)
-{
-   return brw_create_image_common(dri_screen, width, height, format, 0,
-                                  modifiers, count, loaderPrivate);
-}
-
-static __DRIimage *
-brw_create_image_with_modifiers2(__DRIscreen *dri_screen,
-                                 int width, int height, int format,
-                                 const uint64_t *modifiers,
-                                 const unsigned count, unsigned int use,
-                                 void *loaderPrivate)
-{
-   return brw_create_image_common(dri_screen, width, height, format, use,
-                                  modifiers, count, loaderPrivate);
-}
-
-static GLboolean
-brw_query_image(__DRIimage *image, int attrib, int *value)
-{
-   switch (attrib) {
-   case __DRI_IMAGE_ATTRIB_STRIDE:
-      *value = image->pitch;
-      return true;
-   case __DRI_IMAGE_ATTRIB_HANDLE: {
-      __DRIscreen *dri_screen = image->screen->driScrnPriv;
-      uint32_t handle;
-      if (brw_bo_export_gem_handle_for_device(image->bo,
-                                              dri_screen->fd,
-                                              &handle))
-         return false;
-      *value = handle;
-      return true;
-   }
-   case __DRI_IMAGE_ATTRIB_NAME:
-      return !brw_bo_flink(image->bo, (uint32_t *) value);
-   case __DRI_IMAGE_ATTRIB_FORMAT:
-      *value = image->dri_format;
-      return true;
-   case __DRI_IMAGE_ATTRIB_WIDTH:
-      *value = image->width;
-      return true;
-   case __DRI_IMAGE_ATTRIB_HEIGHT:
-      *value = image->height;
-      return true;
-   case __DRI_IMAGE_ATTRIB_COMPONENTS:
-      if (image->planar_format == NULL)
-         return false;
-      *value = image->planar_format->components;
-      return true;
-   case __DRI_IMAGE_ATTRIB_FD:
-      return !brw_bo_gem_export_to_prime(image->bo, value);
-   case __DRI_IMAGE_ATTRIB_FOURCC:
-      return brw_image_get_fourcc(image, value);
-   case __DRI_IMAGE_ATTRIB_NUM_PLANES:
-      if (isl_drm_modifier_has_aux(image->modifier)) {
-         assert(!image->planar_format || image->planar_format->nplanes == 1);
-         *value = 2;
-      } else if (image->planar_format) {
-         *value = image->planar_format->nplanes;
-      } else {
-         *value = 1;
-      }
-      return true;
-   case __DRI_IMAGE_ATTRIB_OFFSET:
-      *value = image->offset;
-      return true;
-   case __DRI_IMAGE_ATTRIB_MODIFIER_LOWER:
-      *value = (image->modifier & 0xffffffff);
-      return true;
-   case __DRI_IMAGE_ATTRIB_MODIFIER_UPPER:
-      *value = ((image->modifier >> 32) & 0xffffffff);
-      return true;
-
-  default:
-      return false;
-   }
-}
-
-static GLboolean
-brw_query_format_modifier_attribs(__DRIscreen *dri_screen,
-                                    uint32_t fourcc, uint64_t modifier,
-                                    int attrib, uint64_t *value)
-{
-   struct brw_screen *screen = dri_screen->driverPrivate;
-   const struct brw_image_format *f = brw_image_format_lookup(fourcc);
-
-   if (!modifier_is_supported(&screen->devinfo, f, 0, 0, modifier))
-      return false;
-
-   switch (attrib) {
-   case __DRI_IMAGE_FORMAT_MODIFIER_ATTRIB_PLANE_COUNT:
-      *value = isl_drm_modifier_has_aux(modifier) ? 2 : f->nplanes;
-      return true;
-
-   default:
-      return false;
-   }
-}
-
-static __DRIimage *
-brw_dup_image(__DRIimage *orig_image, void *loaderPrivate)
-{
-   __DRIimage *image;
-
-   image = calloc(1, sizeof *image);
-   if (image == NULL)
-      return NULL;
-
-   brw_bo_reference(orig_image->bo);
-   image->screen          = orig_image->screen;
-   image->bo              = orig_image->bo;
-   image->internal_format = orig_image->internal_format;
-   image->planar_format   = orig_image->planar_format;
-   image->dri_format      = orig_image->dri_format;
-   image->format          = orig_image->format;
-   image->modifier        = orig_image->modifier;
-   image->offset          = orig_image->offset;
-   image->width           = orig_image->width;
-   image->height          = orig_image->height;
-   image->pitch           = orig_image->pitch;
-   image->tile_x          = orig_image->tile_x;
-   image->tile_y          = orig_image->tile_y;
-   image->has_depthstencil = orig_image->has_depthstencil;
-   image->driScrnPriv     = orig_image->driScrnPriv;
-   image->loader_private  = loaderPrivate;
-   image->aux_offset      = orig_image->aux_offset;
-   image->aux_pitch       = orig_image->aux_pitch;
-
-   memcpy(image->strides, orig_image->strides, sizeof(image->strides));
-   memcpy(image->offsets, orig_image->offsets, sizeof(image->offsets));
-
-   return image;
-}
-
-static GLboolean
-brw_validate_usage(__DRIimage *image, unsigned int use)
-{
-   if (use & __DRI_IMAGE_USE_CURSOR) {
-      if (image->width != 64 || image->height != 64)
-         return GL_FALSE;
-   }
-
-   return GL_TRUE;
-}
-
-static __DRIimage *
-brw_create_image_from_names(__DRIscreen *dri_screen,
-                            int width, int height, int fourcc,
-                            int *names, int num_names,
-                            int *strides, int *offsets,
-                            void *loaderPrivate)
-{
-    const struct brw_image_format *f = NULL;
-    __DRIimage *image;
-    int i, index;
-
-    if (dri_screen == NULL || names == NULL || num_names != 1)
-        return NULL;
-
-    f = brw_image_format_lookup(fourcc);
-    if (f == NULL)
-        return NULL;
-
-    image = brw_create_image_from_name(dri_screen, width, height,
-                                       __DRI_IMAGE_FORMAT_NONE,
-                                       names[0], strides[0],
-                                       loaderPrivate);
-
-   if (image == NULL)
-      return NULL;
-
-    image->planar_format = f;
-    for (i = 0; i < f->nplanes; i++) {
-        index = f->planes[i].buffer_index;
-        image->offsets[index] = offsets[index];
-        image->strides[index] = strides[index];
-    }
-
-    return image;
-}
-
-static __DRIimage *
-brw_create_image_from_fds_common(__DRIscreen *dri_screen,
-                                 int width, int height, int fourcc,
-                                 uint64_t modifier, int *fds, int num_fds,
-                                 int *strides, int *offsets,
-                                 void *loaderPrivate)
-{
-   struct brw_screen *screen = dri_screen->driverPrivate;
-   const struct brw_image_format *f;
-   __DRIimage *image;
-   int i, index;
-   bool ok;
-
-   if (fds == NULL || num_fds < 1)
-      return NULL;
-
-   f = brw_image_format_lookup(fourcc);
-   if (f == NULL)
-      return NULL;
-
-   if (modifier != DRM_FORMAT_MOD_INVALID &&
-       !modifier_is_supported(&screen->devinfo, f, 0, 0, modifier))
-      return NULL;
-
-   if (f->nplanes == 1)
-      image = brw_allocate_image(screen, f->planes[0].dri_format,
-                                   loaderPrivate);
-   else
-      image = brw_allocate_image(screen, __DRI_IMAGE_FORMAT_NONE,
-                                   loaderPrivate);
-
-   if (image == NULL)
-      return NULL;
-
-   image->width = width;
-   image->height = height;
-   image->pitch = strides[0];
-
-   image->planar_format = f;
-
-   if (modifier != DRM_FORMAT_MOD_INVALID) {
-      const struct isl_drm_modifier_info *mod_info =
-         isl_drm_modifier_get_info(modifier);
-      uint32_t tiling = isl_tiling_to_i915_tiling(mod_info->tiling);
-      image->bo = brw_bo_gem_create_from_prime_tiled(screen->bufmgr, fds[0],
-                                                     tiling, strides[0]);
-   } else {
-      image->bo = brw_bo_gem_create_from_prime(screen->bufmgr, fds[0]);
-   }
-
-   if (image->bo == NULL) {
-      free(image);
-      return NULL;
-   }
-
-   /* We only support all planes from the same bo.
-    * brw_bo_gem_create_from_prime() should return the same pointer for all
-    * fds received here */
-   for (i = 1; i < num_fds; i++) {
-      struct brw_bo *aux = brw_bo_gem_create_from_prime(screen->bufmgr, fds[i]);
-      brw_bo_unreference(aux);
-      if (aux != image->bo) {
-         brw_bo_unreference(image->bo);
-         free(image);
-         return NULL;
-      }
-   }
-
-   if (modifier != DRM_FORMAT_MOD_INVALID)
-      image->modifier = modifier;
-   else
-      image->modifier = tiling_to_modifier(image->bo->tiling_mode);
-
-   const struct isl_drm_modifier_info *mod_info =
-      isl_drm_modifier_get_info(image->modifier);
-
-   int size = 0;
-   struct isl_surf surf;
-   for (i = 0; i < f->nplanes; i++) {
-      index = f->planes[i].buffer_index;
-      image->offsets[index] = offsets[index];
-      image->strides[index] = strides[index];
-
-      mesa_format format = driImageFormatToGLFormat(f->planes[i].dri_format);
-      /* The images we will create are actually based on the RGBA non-sRGB
-       * version of the format.
-       */
-      format = _mesa_format_fallback_rgbx_to_rgba(format);
-      format = _mesa_get_srgb_format_linear(format);
-
-      ok = isl_surf_init(&screen->isl_dev, &surf,
-                         .dim = ISL_SURF_DIM_2D,
-                         .format = brw_isl_format_for_mesa_format(format),
-                         .width = image->width >> f->planes[i].width_shift,
-                         .height = image->height >> f->planes[i].height_shift,
-                         .depth = 1,
-                         .levels = 1,
-                         .array_len = 1,
-                         .samples = 1,
-                         .row_pitch_B = strides[index],
-                         .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT |
-                                  ISL_SURF_USAGE_TEXTURE_BIT |
-                                  ISL_SURF_USAGE_STORAGE_BIT,
-                         .tiling_flags = (1 << mod_info->tiling));
-      if (!ok) {
-         brw_bo_unreference(image->bo);
-         free(image);
-         return NULL;
-      }
-
-      const int end = offsets[index] + surf.size_B;
-      if (size < end)
-         size = end;
-   }
-
-   if (mod_info->aux_usage == ISL_AUX_USAGE_CCS_E) {
-      /* Even though we initialize surf in the loop above, we know that
-       * anything with CCS_E will have exactly one plane so surf is properly
-       * initialized when we get here.
-       */
-      assert(f->nplanes == 1);
-
-      image->aux_offset = offsets[1];
-      image->aux_pitch = strides[1];
-
-      /* Scanout hardware requires that the CCS be placed after the main
-       * surface in memory.  We consider any CCS that is placed any earlier in
-       * memory to be invalid and reject it.
-       *
-       * At some point in the future, this restriction may be relaxed if the
-       * hardware becomes less strict but we may need a new modifier for that.
-       */
-      assert(size > 0);
-      if (image->aux_offset < size) {
-         brw_bo_unreference(image->bo);
-         free(image);
-         return NULL;
-      }
-
-      struct isl_surf aux_surf = {0,};
-      ok = isl_surf_get_ccs_surf(&screen->isl_dev, &surf, NULL, &aux_surf,
-                                 image->aux_pitch);
-      if (!ok) {
-         brw_bo_unreference(image->bo);
-         free(image);
-         return NULL;
-      }
-
-      image->aux_size = aux_surf.size_B;
-
-      const int end = image->aux_offset + aux_surf.size_B;
-      if (size < end)
-         size = end;
-   } else {
-      assert(mod_info->aux_usage == ISL_AUX_USAGE_NONE);
-   }
-
-   /* Check that the requested image actually fits within the BO. 'size'
-    * is already relative to the offsets, so we don't need to add that. */
-   if (image->bo->size == 0) {
-      image->bo->size = size;
-   } else if (size > image->bo->size) {
-      brw_bo_unreference(image->bo);
-      free(image);
-      return NULL;
-   }
-
-   if (f->nplanes == 1) {
-      image->offset = image->offsets[0];
-      brw_image_warn_if_unaligned(image, __func__);
-   }
-
-   return image;
-}
-
-static __DRIimage *
-brw_create_image_from_fds(__DRIscreen *dri_screen,
-                          int width, int height, int fourcc,
-                          int *fds, int num_fds, int *strides, int *offsets,
-                          void *loaderPrivate)
-{
-   return brw_create_image_from_fds_common(dri_screen, width, height, fourcc,
-                                           DRM_FORMAT_MOD_INVALID,
-                                           fds, num_fds, strides, offsets,
-                                           loaderPrivate);
-}
-
-static __DRIimage *
-brw_create_image_from_dma_bufs2(__DRIscreen *dri_screen,
-                                int width, int height,
-                                int fourcc, uint64_t modifier,
-                                int *fds, int num_fds,
-                                int *strides, int *offsets,
-                                enum __DRIYUVColorSpace yuv_color_space,
-                                enum __DRISampleRange sample_range,
-                                enum __DRIChromaSiting horizontal_siting,
-                                enum __DRIChromaSiting vertical_siting,
-                                unsigned *error,
-                                void *loaderPrivate)
-{
-   __DRIimage *image;
-   const struct brw_image_format *f = brw_image_format_lookup(fourcc);
-
-   if (!f) {
-      *error = __DRI_IMAGE_ERROR_BAD_MATCH;
-      return NULL;
-   }
-
-   image = brw_create_image_from_fds_common(dri_screen, width, height,
-                                            fourcc, modifier,
-                                            fds, num_fds, strides, offsets,
-                                            loaderPrivate);
-
-   /*
-    * Invalid parameters and any inconsistencies between are assumed to be
-    * checked by the caller. Therefore besides unsupported formats one can fail
-    * only in allocation.
-    */
-   if (!image) {
-      *error = __DRI_IMAGE_ERROR_BAD_ALLOC;
-      return NULL;
-   }
-
-   image->yuv_color_space = yuv_color_space;
-   image->sample_range = sample_range;
-   image->horizontal_siting = horizontal_siting;
-   image->vertical_siting = vertical_siting;
-   image->imported_dmabuf = true;
-
-   *error = __DRI_IMAGE_ERROR_SUCCESS;
-   return image;
-}
-
-static __DRIimage *
-brw_create_image_from_dma_bufs(__DRIscreen *dri_screen,
-                               int width, int height, int fourcc,
-                               int *fds, int num_fds,
-                               int *strides, int *offsets,
-                               enum __DRIYUVColorSpace yuv_color_space,
-                               enum __DRISampleRange sample_range,
-                               enum __DRIChromaSiting horizontal_siting,
-                               enum __DRIChromaSiting vertical_siting,
-                               unsigned *error,
-                               void *loaderPrivate)
-{
-   return brw_create_image_from_dma_bufs2(dri_screen, width, height,
-                                          fourcc, DRM_FORMAT_MOD_INVALID,
-                                          fds, num_fds, strides, offsets,
-                                          yuv_color_space,
-                                          sample_range,
-                                          horizontal_siting,
-                                          vertical_siting,
-                                          error,
-                                          loaderPrivate);
-}
-
-static bool
-brw_image_format_is_supported(const struct intel_device_info *devinfo,
-                                const struct brw_image_format *fmt)
-{
-   /* Currently, all formats with an brw_image_format are available on all
-    * platforms so there's really nothing to check there.
-    */
-
-#ifndef NDEBUG
-   if (fmt->nplanes == 1) {
-      mesa_format format = driImageFormatToGLFormat(fmt->planes[0].dri_format);
-      /* The images we will create are actually based on the RGBA non-sRGB
-       * version of the format.
-       */
-      format = _mesa_format_fallback_rgbx_to_rgba(format);
-      format = _mesa_get_srgb_format_linear(format);
-      enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
-      assert(isl_format_supports_rendering(devinfo, isl_format));
-   }
-#endif
-
-   return true;
-}
-
-static GLboolean
-brw_query_dma_buf_formats(__DRIscreen *_screen, int max,
-                            int *formats, int *count)
-{
-   struct brw_screen *screen = _screen->driverPrivate;
-   int num_formats = 0, i;
-
-   for (i = 0; i < ARRAY_SIZE(brw_image_formats); i++) {
-      /* These formats are valid DRI formats but do not exist in drm_fourcc.h
-       * in the Linux kernel. We don't want to accidentally advertise them
-       * them through the EGL layer.
-       */
-      if (brw_image_formats[i].fourcc == __DRI_IMAGE_FOURCC_SARGB8888 ||
-          brw_image_formats[i].fourcc == __DRI_IMAGE_FOURCC_SABGR8888 ||
-          brw_image_formats[i].fourcc == __DRI_IMAGE_FOURCC_SXRGB8888)
-         continue;
-
-      if (!brw_image_format_is_supported(&screen->devinfo,
-                                           &brw_image_formats[i]))
-         continue;
-
-      num_formats++;
-      if (max == 0)
-         continue;
-
-      formats[num_formats - 1] = brw_image_formats[i].fourcc;
-      if (num_formats >= max)
-         break;
-   }
-
-   *count = num_formats;
-   return true;
-}
-
-static GLboolean
-brw_query_dma_buf_modifiers(__DRIscreen *_screen, int fourcc, int max,
-                              uint64_t *modifiers,
-                              unsigned int *external_only,
-                              int *count)
-{
-   struct brw_screen *screen = _screen->driverPrivate;
-   const struct brw_image_format *f;
-   int num_mods = 0, i;
-
-   f = brw_image_format_lookup(fourcc);
-   if (f == NULL)
-      return false;
-
-   if (!brw_image_format_is_supported(&screen->devinfo, f))
-      return false;
-
-   for (i = 0; i < ARRAY_SIZE(supported_modifiers); i++) {
-      uint64_t modifier = supported_modifiers[i].modifier;
-      if (!modifier_is_supported(&screen->devinfo, f, 0, 0, modifier))
-         continue;
-
-      num_mods++;
-      if (max == 0)
-         continue;
-
-      modifiers[num_mods - 1] = modifier;
-      if (num_mods >= max)
-        break;
-   }
-
-   if (external_only != NULL) {
-      for (i = 0; i < num_mods && i < max; i++) {
-         if (f->components == __DRI_IMAGE_COMPONENTS_Y_U_V ||
-             f->components == __DRI_IMAGE_COMPONENTS_Y_UV ||
-             f->components == __DRI_IMAGE_COMPONENTS_AYUV ||
-             f->components == __DRI_IMAGE_COMPONENTS_XYUV ||
-             f->components == __DRI_IMAGE_COMPONENTS_Y_XUXV ||
-             f->components == __DRI_IMAGE_COMPONENTS_Y_UXVX) {
-            external_only[i] = GL_TRUE;
-         }
-         else {
-            external_only[i] = GL_FALSE;
-         }
-      }
-   }
-
-   *count = num_mods;
-   return true;
-}
-
-static __DRIimage *
-brw_from_planar(__DRIimage *parent, int plane, void *loaderPrivate)
-{
-    int width, height, offset, stride, size, dri_format;
-    __DRIimage *image;
-
-    if (parent == NULL)
-       return NULL;
-
-    width = parent->width;
-    height = parent->height;
-
-    const struct brw_image_format *f = parent->planar_format;
-
-    if (f && plane < f->nplanes) {
-       /* Use the planar format definition. */
-       width >>= f->planes[plane].width_shift;
-       height >>= f->planes[plane].height_shift;
-       dri_format = f->planes[plane].dri_format;
-       int index = f->planes[plane].buffer_index;
-       offset = parent->offsets[index];
-       stride = parent->strides[index];
-       size = height * stride;
-    } else if (plane == 0) {
-       /* The only plane of a non-planar image: copy the parent definition
-        * directly. */
-       dri_format = parent->dri_format;
-       offset = parent->offset;
-       stride = parent->pitch;
-       size = height * stride;
-    } else if (plane == 1 && parent->modifier != DRM_FORMAT_MOD_INVALID &&
-               isl_drm_modifier_has_aux(parent->modifier)) {
-       /* Auxiliary plane */
-       dri_format = parent->dri_format;
-       offset = parent->aux_offset;
-       stride = parent->aux_pitch;
-       size = parent->aux_size;
-    } else {
-       return NULL;
-    }
-
-    if (offset + size > parent->bo->size) {
-       _mesa_warning(NULL, "intel_from_planar: subimage out of bounds");
-       return NULL;
-    }
-
-    image = brw_allocate_image(parent->screen, dri_format, loaderPrivate);
-    if (image == NULL)
-       return NULL;
-
-    image->bo = parent->bo;
-    brw_bo_reference(parent->bo);
-    image->modifier = parent->modifier;
-
-    image->width = width;
-    image->height = height;
-    image->pitch = stride;
-    image->offset = offset;
-
-    brw_image_warn_if_unaligned(image, __func__);
-
-    return image;
-}
-
-static const __DRIimageExtension brwImageExtension = {
-    .base = { __DRI_IMAGE, 19 },
-
-    .createImageFromName                = brw_create_image_from_name,
-    .createImageFromRenderbuffer        = brw_create_image_from_renderbuffer,
-    .destroyImage                       = brw_destroy_image,
-    .createImage                        = brw_create_image,
-    .queryImage                         = brw_query_image,
-    .dupImage                           = brw_dup_image,
-    .validateUsage                      = brw_validate_usage,
-    .createImageFromNames               = brw_create_image_from_names,
-    .fromPlanar                         = brw_from_planar,
-    .createImageFromTexture             = brw_create_image_from_texture,
-    .createImageFromFds                 = brw_create_image_from_fds,
-    .createImageFromDmaBufs             = brw_create_image_from_dma_bufs,
-    .blitImage                          = NULL,
-    .getCapabilities                    = NULL,
-    .mapImage                           = brw_map_image,
-    .unmapImage                         = brw_unmap_image,
-    .createImageWithModifiers           = brw_create_image_with_modifiers,
-    .createImageFromDmaBufs2            = brw_create_image_from_dma_bufs2,
-    .queryDmaBufFormats                 = brw_query_dma_buf_formats,
-    .queryDmaBufModifiers               = brw_query_dma_buf_modifiers,
-    .queryDmaBufFormatModifierAttribs   = brw_query_format_modifier_attribs,
-    .createImageWithModifiers2          = brw_create_image_with_modifiers2,
-};
-
-static int
-brw_query_renderer_integer(__DRIscreen *dri_screen,
-                           int param, unsigned int *value)
-{
-   const struct brw_screen *const screen =
-      (struct brw_screen *) dri_screen->driverPrivate;
-
-   switch (param) {
-   case __DRI2_RENDERER_VENDOR_ID:
-      value[0] = 0x8086;
-      return 0;
-   case __DRI2_RENDERER_DEVICE_ID:
-      value[0] = screen->deviceID;
-      return 0;
-   case __DRI2_RENDERER_ACCELERATED:
-      value[0] = 1;
-      return 0;
-   case __DRI2_RENDERER_VIDEO_MEMORY: {
-      /* Once a batch uses more than 75% of the maximum mappable size, we
-       * assume that there's some fragmentation, and we start doing extra
-       * flushing, etc.  That's the big cliff apps will care about.
-       */
-      const unsigned gpu_mappable_megabytes =
-         screen->aperture_threshold / (1024 * 1024);
-
-      const long system_memory_pages = sysconf(_SC_PHYS_PAGES);
-      const long system_page_size = sysconf(_SC_PAGE_SIZE);
-
-      if (system_memory_pages <= 0 || system_page_size <= 0)
-         return -1;
-
-      const uint64_t system_memory_bytes = (uint64_t) system_memory_pages
-         * (uint64_t) system_page_size;
-
-      const unsigned system_memory_megabytes =
-         (unsigned) (system_memory_bytes / (1024 * 1024));
-
-      value[0] = MIN2(system_memory_megabytes, gpu_mappable_megabytes);
-      return 0;
-   }
-   case __DRI2_RENDERER_UNIFIED_MEMORY_ARCHITECTURE:
-      value[0] = 1;
-      return 0;
-   case __DRI2_RENDERER_HAS_TEXTURE_3D:
-      value[0] = 1;
-      return 0;
-   case __DRI2_RENDERER_HAS_CONTEXT_PRIORITY:
-      value[0] = 0;
-      if (brw_hw_context_set_priority(screen->bufmgr,
-                                      0, INTEL_CONTEXT_HIGH_PRIORITY) == 0)
-         value[0] |= __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_HIGH;
-      if (brw_hw_context_set_priority(screen->bufmgr,
-                                      0, INTEL_CONTEXT_LOW_PRIORITY) == 0)
-         value[0] |= __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_LOW;
-      /* reset to default last, just in case */
-      if (brw_hw_context_set_priority(screen->bufmgr,
-                                      0, INTEL_CONTEXT_MEDIUM_PRIORITY) == 0)
-         value[0] |= __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_MEDIUM;
-      return 0;
-   case __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB:
-      value[0] = 1;
-      return 0;
-   default:
-      return driQueryRendererIntegerCommon(dri_screen, param, value);
-   }
-
-   return -1;
-}
-
-static int
-brw_query_renderer_string(__DRIscreen *dri_screen,
-                          int param, const char **value)
-{
-   const struct brw_screen *screen =
-      (struct brw_screen *) dri_screen->driverPrivate;
-
-   switch (param) {
-   case __DRI2_RENDERER_VENDOR_ID:
-      value[0] = brw_vendor_string;
-      return 0;
-   case __DRI2_RENDERER_DEVICE_ID:
-      value[0] = brw_get_renderer_string(screen);
-      return 0;
-   default:
-      break;
-   }
-
-   return -1;
-}
-
-static void
-brw_set_cache_funcs(__DRIscreen *dri_screen,
-                    __DRIblobCacheSet set, __DRIblobCacheGet get)
-{
-   const struct brw_screen *const screen =
-      (struct brw_screen *) dri_screen->driverPrivate;
-
-   if (!screen->disk_cache)
-      return;
-
-   disk_cache_set_callbacks(screen->disk_cache, set, get);
-}
-
-static const __DRI2rendererQueryExtension brwRendererQueryExtension = {
-   .base = { __DRI2_RENDERER_QUERY, 1 },
-
-   .queryInteger = brw_query_renderer_integer,
-   .queryString = brw_query_renderer_string
-};
-
-static const __DRIrobustnessExtension dri2Robustness = {
-   .base = { __DRI2_ROBUSTNESS, 1 }
-};
-
-static const __DRI2blobExtension brwBlobExtension = {
-   .base = { __DRI2_BLOB, 1 },
-   .set_cache_funcs = brw_set_cache_funcs
-};
-
-static const __DRImutableRenderBufferDriverExtension brwMutableRenderBufferExtension = {
-   .base = { __DRI_MUTABLE_RENDER_BUFFER_DRIVER, 1 },
-};
-
-static const __DRIextension *screenExtensions[] = {
-    &brwTexBufferExtension.base,
-    &brwFenceExtension.base,
-    &brwFlushExtension.base,
-    &brwImageExtension.base,
-    &brwRendererQueryExtension.base,
-    &brwMutableRenderBufferExtension.base,
-    &dri2ConfigQueryExtension.base,
-    &dri2NoErrorExtension.base,
-    &brwBlobExtension.base,
-    NULL
-};
-
-static const __DRIextension *brwRobustScreenExtensions[] = {
-    &brwTexBufferExtension.base,
-    &brwFenceExtension.base,
-    &brwFlushExtension.base,
-    &brwImageExtension.base,
-    &brwRendererQueryExtension.base,
-    &brwMutableRenderBufferExtension.base,
-    &dri2ConfigQueryExtension.base,
-    &dri2Robustness.base,
-    &dri2NoErrorExtension.base,
-    &brwBlobExtension.base,
-    NULL
-};
-
-static int
-brw_get_param(struct brw_screen *screen, int param, int *value)
-{
-   int ret = 0;
-   struct drm_i915_getparam gp;
-
-   memset(&gp, 0, sizeof(gp));
-   gp.param = param;
-   gp.value = value;
-
-   if (drmIoctl(screen->fd, DRM_IOCTL_I915_GETPARAM, &gp) == -1) {
-      ret = -errno;
-      if (ret != -EINVAL)
-         _mesa_warning(NULL, "drm_i915_getparam: %d", ret);
-   }
-
-   return ret;
-}
-
-static bool
-brw_get_boolean(struct brw_screen *screen, int param)
-{
-   int value = 0;
-   return (brw_get_param(screen, param, &value) == 0) && value;
-}
-
-static int
-brw_get_integer(struct brw_screen *screen, int param)
-{
-   int value = -1;
-
-   if (brw_get_param(screen, param, &value) == 0)
-      return value;
-
-   return -1;
-}
-
-static void
-brw_destroy_screen(__DRIscreen *sPriv)
-{
-   struct brw_screen *screen = sPriv->driverPrivate;
-
-   brw_bufmgr_unref(screen->bufmgr);
-   driDestroyOptionInfo(&screen->optionCache);
-
-   disk_cache_destroy(screen->disk_cache);
-
-   ralloc_free(screen);
-   sPriv->driverPrivate = NULL;
-}
-
-
-/**
- * Create a gl_framebuffer and attach it to __DRIdrawable::driverPrivate.
- *
- *_This implements driDriverAPI::createNewDrawable, which the DRI layer calls
- * when creating a EGLSurface, GLXDrawable, or GLXPixmap. Despite the name,
- * this does not allocate GPU memory.
- */
-static GLboolean
-brw_create_buffer(__DRIscreen *dri_screen,
-                  __DRIdrawable *driDrawPriv,
-                  const struct gl_config *mesaVis, GLboolean isPixmap)
-{
-   struct brw_renderbuffer *rb;
-   struct brw_screen *screen = (struct brw_screen *)
-      dri_screen->driverPrivate;
-   mesa_format rgbFormat;
-   unsigned num_samples =
-      brw_quantize_num_samples(screen, mesaVis->samples);
-
-   if (isPixmap)
-      return false;
-
-   struct gl_framebuffer *fb = CALLOC_STRUCT(gl_framebuffer);
-   if (!fb)
-      return false;
-
-   _mesa_initialize_window_framebuffer(fb, mesaVis);
-
-   if (screen->winsys_msaa_samples_override != -1) {
-      num_samples = screen->winsys_msaa_samples_override;
-      fb->Visual.samples = num_samples;
-   }
-
-   if (mesaVis->redBits == 16 && mesaVis->alphaBits > 0 && mesaVis->floatMode) {
-      rgbFormat = MESA_FORMAT_RGBA_FLOAT16;
-   } else if (mesaVis->redBits == 16 && mesaVis->floatMode) {
-      rgbFormat = MESA_FORMAT_RGBX_FLOAT16;
-   } else if (mesaVis->redBits == 10 && mesaVis->alphaBits > 0) {
-      rgbFormat = mesaVis->redMask == 0x3ff00000 ? MESA_FORMAT_B10G10R10A2_UNORM
-                                                 : MESA_FORMAT_R10G10B10A2_UNORM;
-   } else if (mesaVis->redBits == 10) {
-      rgbFormat = mesaVis->redMask == 0x3ff00000 ? MESA_FORMAT_B10G10R10X2_UNORM
-                                                 : MESA_FORMAT_R10G10B10X2_UNORM;
-   } else if (mesaVis->redBits == 5) {
-      rgbFormat = mesaVis->redMask == 0x1f ? MESA_FORMAT_R5G6B5_UNORM
-                                           : MESA_FORMAT_B5G6R5_UNORM;
-   } else if (mesaVis->alphaBits == 0) {
-      rgbFormat = mesaVis->redMask == 0xff ? MESA_FORMAT_R8G8B8X8_SRGB
-                                           : MESA_FORMAT_B8G8R8X8_SRGB;
-      fb->Visual.sRGBCapable = true;
-   } else if (mesaVis->sRGBCapable) {
-      rgbFormat = mesaVis->redMask == 0xff ? MESA_FORMAT_R8G8B8A8_SRGB
-                                           : MESA_FORMAT_B8G8R8A8_SRGB;
-      fb->Visual.sRGBCapable = true;
-   } else {
-      rgbFormat = mesaVis->redMask == 0xff ? MESA_FORMAT_R8G8B8A8_SRGB
-                                           : MESA_FORMAT_B8G8R8A8_SRGB;
-      fb->Visual.sRGBCapable = true;
-   }
-
-   /* mesaVis->sRGBCapable was set, user is asking for sRGB */
-   bool srgb_cap_set = mesaVis->redBits >= 8 && mesaVis->sRGBCapable;
-
-   /* setup the hardware-based renderbuffers */
-   rb = brw_create_winsys_renderbuffer(screen, rgbFormat, num_samples);
-   _mesa_attach_and_own_rb(fb, BUFFER_FRONT_LEFT, &rb->Base.Base);
-   rb->need_srgb = srgb_cap_set;
-
-   if (mesaVis->doubleBufferMode) {
-      rb = brw_create_winsys_renderbuffer(screen, rgbFormat, num_samples);
-      _mesa_attach_and_own_rb(fb, BUFFER_BACK_LEFT, &rb->Base.Base);
-      rb->need_srgb = srgb_cap_set;
-   }
-
-   /*
-    * Assert here that the gl_config has an expected depth/stencil bit
-    * combination: one of d24/s8, d16/s0, d0/s0. (See brw_init_screen(),
-    * which constructs the advertised configs.)
-    */
-   if (mesaVis->depthBits == 24) {
-      assert(mesaVis->stencilBits == 8);
-
-      if (screen->devinfo.has_hiz_and_separate_stencil) {
-         rb = brw_create_private_renderbuffer(screen,
-                                                MESA_FORMAT_Z24_UNORM_X8_UINT,
-                                                num_samples);
-         _mesa_attach_and_own_rb(fb, BUFFER_DEPTH, &rb->Base.Base);
-         rb = brw_create_private_renderbuffer(screen, MESA_FORMAT_S_UINT8,
-                                                num_samples);
-         _mesa_attach_and_own_rb(fb, BUFFER_STENCIL, &rb->Base.Base);
-      } else {
-         /*
-          * Use combined depth/stencil. Note that the renderbuffer is
-          * attached to two attachment points.
-          */
-         rb = brw_create_private_renderbuffer(screen,
-                                                MESA_FORMAT_Z24_UNORM_S8_UINT,
-                                                num_samples);
-         _mesa_attach_and_own_rb(fb, BUFFER_DEPTH, &rb->Base.Base);
-         _mesa_attach_and_reference_rb(fb, BUFFER_STENCIL, &rb->Base.Base);
-      }
-   }
-   else if (mesaVis->depthBits == 16) {
-      assert(mesaVis->stencilBits == 0);
-      rb = brw_create_private_renderbuffer(screen, MESA_FORMAT_Z_UNORM16,
-                                             num_samples);
-      _mesa_attach_and_own_rb(fb, BUFFER_DEPTH, &rb->Base.Base);
-   }
-   else {
-      assert(mesaVis->depthBits == 0);
-      assert(mesaVis->stencilBits == 0);
-   }
-
-   /* now add any/all software-based renderbuffers we may need */
-   _swrast_add_soft_renderbuffers(fb,
-                                  false, /* never sw color */
-                                  false, /* never sw depth */
-                                  false, /* never sw stencil */
-                                  mesaVis->accumRedBits > 0,
-                                  false /* never sw alpha */);
-   driDrawPriv->driverPrivate = fb;
-
-   return true;
-}
-
-static void
-brw_destroy_buffer(__DRIdrawable *driDrawPriv)
-{
-    struct gl_framebuffer *fb = driDrawPriv->driverPrivate;
-
-    _mesa_reference_framebuffer(&fb, NULL);
-}
-
-static bool
-brw_init_bufmgr(struct brw_screen *screen)
-{
-   __DRIscreen *dri_screen = screen->driScrnPriv;
-
-   bool bo_reuse = false;
-   int bo_reuse_mode = driQueryOptioni(&screen->optionCache, "bo_reuse");
-   switch (bo_reuse_mode) {
-   case DRI_CONF_BO_REUSE_DISABLED:
-      break;
-   case DRI_CONF_BO_REUSE_ALL:
-      bo_reuse = true;
-      break;
-   }
-
-   screen->bufmgr = brw_bufmgr_get_for_fd(&screen->devinfo, dri_screen->fd, bo_reuse);
-   if (screen->bufmgr == NULL) {
-      fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n",
-              __func__, __LINE__);
-      return false;
-   }
-   screen->fd = brw_bufmgr_get_fd(screen->bufmgr);
-
-   if (!brw_get_boolean(screen, I915_PARAM_HAS_EXEC_NO_RELOC)) {
-      fprintf(stderr, "[%s: %u] Kernel 3.9 required.\n", __func__, __LINE__);
-      return false;
-   }
-
-   return true;
-}
-
-static int
-brw_detect_timestamp(struct brw_screen *screen)
-{
-   uint64_t dummy = 0, last = 0;
-   int upper, lower, loops;
-
-   /* On 64bit systems, some old kernels trigger a hw bug resulting in the
-    * TIMESTAMP register being shifted and the low 32bits always zero.
-    *
-    * More recent kernels offer an interface to read the full 36bits
-    * everywhere.
-    */
-   if (brw_reg_read(screen->bufmgr, TIMESTAMP | 1, &dummy) == 0)
-      return 3;
-
-   /* Determine if we have a 32bit or 64bit kernel by inspecting the
-    * upper 32bits for a rapidly changing timestamp.
-    */
-   if (brw_reg_read(screen->bufmgr, TIMESTAMP, &last))
-      return 0;
-
-   upper = lower = 0;
-   for (loops = 0; loops < 10; loops++) {
-      /* The TIMESTAMP should change every 80ns, so several round trips
-       * through the kernel should be enough to advance it.
-       */
-      if (brw_reg_read(screen->bufmgr, TIMESTAMP, &dummy))
-         return 0;
-
-      upper += (dummy >> 32) != (last >> 32);
-      if (upper > 1) /* beware 32bit counter overflow */
-         return 2; /* upper dword holds the low 32bits of the timestamp */
-
-      lower += (dummy & 0xffffffff) != (last & 0xffffffff);
-      if (lower > 1)
-         return 1; /* timestamp is unshifted */
-
-      last = dummy;
-   }
-
-   /* No advancement? No timestamp! */
-   return 0;
-}
-
- /**
- * Test if we can use MI_LOAD_REGISTER_MEM from an untrusted batchbuffer.
- *
- * Some combinations of hardware and kernel versions allow this feature,
- * while others don't.  Instead of trying to enumerate every case, just
- * try and write a register and see if works.
- */
-static bool
-brw_detect_pipelined_register(struct brw_screen *screen,
-                                int reg, uint32_t expected_value, bool reset)
-{
-   if (screen->devinfo.no_hw)
-      return false;
-
-   struct brw_bo *results, *bo;
-   uint32_t *batch;
-   uint32_t offset = 0;
-   void *map;
-   bool success = false;
-
-   /* Create a zero'ed temporary buffer for reading our results */
-   results = brw_bo_alloc(screen->bufmgr, "registers", 4096, BRW_MEMZONE_OTHER);
-   if (results == NULL)
-      goto err;
-
-   bo = brw_bo_alloc(screen->bufmgr, "batchbuffer", 4096, BRW_MEMZONE_OTHER);
-   if (bo == NULL)
-      goto err_results;
-
-   map = brw_bo_map(NULL, bo, MAP_WRITE);
-   if (!map)
-      goto err_batch;
-
-   batch = map;
-
-   /* Write the register. */
-   *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2);
-   *batch++ = reg;
-   *batch++ = expected_value;
-
-   /* Save the register's value back to the buffer. */
-   *batch++ = MI_STORE_REGISTER_MEM | (3 - 2);
-   *batch++ = reg;
-   struct drm_i915_gem_relocation_entry reloc = {
-      .offset = (char *) batch - (char *) map,
-      .delta = offset * sizeof(uint32_t),
-      .target_handle = results->gem_handle,
-      .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
-      .write_domain = I915_GEM_DOMAIN_INSTRUCTION,
-   };
-   *batch++ = reloc.presumed_offset + reloc.delta;
-
-   /* And afterwards clear the register */
-   if (reset) {
-      *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2);
-      *batch++ = reg;
-      *batch++ = 0;
-   }
-
-   *batch++ = MI_BATCH_BUFFER_END;
-
-   struct drm_i915_gem_exec_object2 exec_objects[2] = {
-      {
-         .handle = results->gem_handle,
-      },
-      {
-         .handle = bo->gem_handle,
-         .relocation_count = 1,
-         .relocs_ptr = (uintptr_t) &reloc,
-      }
-   };
-
-   struct drm_i915_gem_execbuffer2 execbuf = {
-      .buffers_ptr = (uintptr_t) exec_objects,
-      .buffer_count = 2,
-      .batch_len = ALIGN((char *) batch - (char *) map, 8),
-      .flags = I915_EXEC_RENDER,
-   };
-
-   /* Don't bother with error checking - if the execbuf fails, the
-    * value won't be written and we'll just report that there's no access.
-    */
-   drmIoctl(screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
-
-   /* Check whether the value got written. */
-   void *results_map = brw_bo_map(NULL, results, MAP_READ);
-   if (results_map) {
-      success = *((uint32_t *)results_map + offset) == expected_value;
-      brw_bo_unmap(results);
-   }
-
-err_batch:
-   brw_bo_unreference(bo);
-err_results:
-   brw_bo_unreference(results);
-err:
-   return success;
-}
-
-static bool
-brw_detect_pipelined_so(struct brw_screen *screen)
-{
-   const struct intel_device_info *devinfo = &screen->devinfo;
-
-   /* Supposedly, Broadwell just works. */
-   if (devinfo->ver >= 8)
-      return true;
-
-   if (devinfo->ver <= 6)
-      return false;
-
-   /* See the big explanation about command parser versions below */
-   if (screen->cmd_parser_version >= (devinfo->verx10 == 75 ? 7 : 2))
-      return true;
-
-   /* We use SO_WRITE_OFFSET0 since you're supposed to write it (unlike the
-    * statistics registers), and we already reset it to zero before using it.
-    */
-   return brw_detect_pipelined_register(screen,
-                                          GFX7_SO_WRITE_OFFSET(0),
-                                          0x1337d0d0,
-                                          false);
-}
-
-/**
- * Return array of MSAA modes supported by the hardware. The array is
- * zero-terminated and sorted in decreasing order.
- */
-const int*
-brw_supported_msaa_modes(const struct brw_screen  *screen)
-{
-   static const int gfx9_modes[] = {16, 8, 4, 2, 0, -1};
-   static const int gfx8_modes[] = {8, 4, 2, 0, -1};
-   static const int gfx7_modes[] = {8, 4, 0, -1};
-   static const int gfx6_modes[] = {4, 0, -1};
-   static const int gfx4_modes[] = {0, -1};
-
-   if (screen->devinfo.ver >= 9) {
-      return gfx9_modes;
-   } else if (screen->devinfo.ver >= 8) {
-      return gfx8_modes;
-   } else if (screen->devinfo.ver >= 7) {
-      return gfx7_modes;
-   } else if (screen->devinfo.ver == 6) {
-      return gfx6_modes;
-   } else {
-      return gfx4_modes;
-   }
-}
-
-static unsigned
-brw_loader_get_cap(const __DRIscreen *dri_screen, enum dri_loader_cap cap)
-{
-   if (dri_screen->dri2.loader && dri_screen->dri2.loader->base.version >= 4 &&
-       dri_screen->dri2.loader->getCapability)
-      return dri_screen->dri2.loader->getCapability(dri_screen->loaderPrivate, cap);
-
-   if (dri_screen->image.loader && dri_screen->image.loader->base.version >= 2 &&
-       dri_screen->image.loader->getCapability)
-      return dri_screen->image.loader->getCapability(dri_screen->loaderPrivate, cap);
-
-   return 0;
-}
-
-static bool
-brw_allowed_format(__DRIscreen *dri_screen, mesa_format format)
-{
-   struct brw_screen *screen = dri_screen->driverPrivate;
-
-   /* Expose only BGRA ordering if the loader doesn't support RGBA ordering. */
-   bool allow_rgba_ordering = brw_loader_get_cap(dri_screen, DRI_LOADER_CAP_RGBA_ORDERING);
-   if (!allow_rgba_ordering &&
-       (format == MESA_FORMAT_R8G8B8A8_UNORM ||
-        format == MESA_FORMAT_R8G8B8X8_UNORM ||
-        format == MESA_FORMAT_R8G8B8A8_SRGB ||
-        format == MESA_FORMAT_R8G8B8X8_SRGB))
-      return false;
-
-    /* Shall we expose 10 bpc formats? */
-   bool allow_rgb10_configs = driQueryOptionb(&screen->optionCache,
-                                              "allow_rgb10_configs");
-   if (!allow_rgb10_configs &&
-       (format == MESA_FORMAT_B10G10R10A2_UNORM ||
-        format == MESA_FORMAT_B10G10R10X2_UNORM))
-      return false;
-
-   /* Shall we expose 565 formats? */
-   bool allow_rgb565_configs = driQueryOptionb(&screen->optionCache,
-                                               "allow_rgb565_configs");
-   if (!allow_rgb565_configs && format == MESA_FORMAT_B5G6R5_UNORM)
-      return false;
-
-   /* Shall we expose fp16 formats? */
-   bool allow_fp16_configs = brw_loader_get_cap(dri_screen, DRI_LOADER_CAP_FP16);
-   if (!allow_fp16_configs &&
-       (format == MESA_FORMAT_RGBA_FLOAT16 ||
-        format == MESA_FORMAT_RGBX_FLOAT16))
-      return false;
-
-   return true;
-}
-
-static __DRIconfig**
-brw_screen_make_configs(__DRIscreen *dri_screen)
-{
-   static const mesa_format formats[] = {
-      MESA_FORMAT_B5G6R5_UNORM,
-      MESA_FORMAT_B8G8R8A8_UNORM,
-      MESA_FORMAT_B8G8R8X8_UNORM,
-
-      MESA_FORMAT_B8G8R8A8_SRGB,
-      MESA_FORMAT_B8G8R8X8_SRGB,
-
-      /* For 10 bpc, 30 bit depth framebuffers. */
-      MESA_FORMAT_B10G10R10A2_UNORM,
-      MESA_FORMAT_B10G10R10X2_UNORM,
-
-      MESA_FORMAT_RGBA_FLOAT16,
-      MESA_FORMAT_RGBX_FLOAT16,
-
-      /* The 32-bit RGBA format must not precede the 32-bit BGRA format.
-       * Likewise for RGBX and BGRX.  Otherwise, the GLX client and the GLX
-       * server may disagree on which format the GLXFBConfig represents,
-       * resulting in swapped color channels.
-       *
-       * The problem, as of 2017-05-30:
-       * When matching a GLXFBConfig to a __DRIconfig, GLX ignores the channel
-       * order and chooses the first __DRIconfig with the expected channel
-       * sizes. Specifically, GLX compares the GLXFBConfig's and __DRIconfig's
-       * __DRI_ATTRIB_{CHANNEL}_SIZE but ignores __DRI_ATTRIB_{CHANNEL}_MASK.
-       *
-       * EGL does not suffer from this problem. It correctly compares the
-       * channel masks when matching EGLConfig to __DRIconfig.
-       */
-
-      /* Required by Android, for HAL_PIXEL_FORMAT_RGBA_8888. */
-      MESA_FORMAT_R8G8B8A8_UNORM,
-      MESA_FORMAT_R8G8B8A8_SRGB,
-
-      /* Required by Android, for HAL_PIXEL_FORMAT_RGBX_8888. */
-      MESA_FORMAT_R8G8B8X8_UNORM,
-      MESA_FORMAT_R8G8B8X8_SRGB,
-   };
-
-   /* __DRI_ATTRIB_SWAP_COPY is not supported due to page flipping. */
-   static const GLenum back_buffer_modes[] = {
-      __DRI_ATTRIB_SWAP_UNDEFINED, __DRI_ATTRIB_SWAP_NONE
-   };
-
-   static const uint8_t singlesample_samples[1] = {0};
-
-   struct brw_screen *screen = dri_screen->driverPrivate;
-   const struct intel_device_info *devinfo = &screen->devinfo;
-   uint8_t depth_bits[4], stencil_bits[4];
-   __DRIconfig **configs = NULL;
-
-   unsigned num_formats = ARRAY_SIZE(formats);
-
-   /* Generate singlesample configs, each without accumulation buffer
-    * and with EGL_MUTABLE_RENDER_BUFFER_BIT_KHR.
-    */
-   for (unsigned i = 0; i < num_formats; i++) {
-      __DRIconfig **new_configs;
-      int num_depth_stencil_bits = 1;
-
-      if (!brw_allowed_format(dri_screen, formats[i]))
-         continue;
-
-      /* Starting with DRI2 protocol version 1.1 we can request a depth/stencil
-       * buffer that has a different number of bits per pixel than the color
-       * buffer, gen >= 6 supports this.
-       */
-      depth_bits[0] = 0;
-      stencil_bits[0] = 0;
-
-      if (formats[i] == MESA_FORMAT_B5G6R5_UNORM) {
-         if (devinfo->ver >= 8) {
-            depth_bits[num_depth_stencil_bits] = 16;
-            stencil_bits[num_depth_stencil_bits] = 0;
-            num_depth_stencil_bits++;
-         }
-         if (devinfo->ver >= 6) {
-             depth_bits[num_depth_stencil_bits] = 24;
-             stencil_bits[num_depth_stencil_bits] = 8;
-             num_depth_stencil_bits++;
-         }
-      } else {
-         depth_bits[num_depth_stencil_bits] = 24;
-         stencil_bits[num_depth_stencil_bits] = 8;
-         num_depth_stencil_bits++;
-      }
-
-      new_configs = driCreateConfigs(formats[i],
-                                     depth_bits,
-                                     stencil_bits,
-                                     num_depth_stencil_bits,
-                                     back_buffer_modes, 2,
-                                     singlesample_samples, 1,
-                                     false, false);
-      configs = driConcatConfigs(configs, new_configs);
-   }
-
-   /* Generate the minimum possible set of configs that include an
-    * accumulation buffer.
-    */
-   for (unsigned i = 0; i < num_formats; i++) {
-      __DRIconfig **new_configs;
-
-      if (!brw_allowed_format(dri_screen, formats[i]))
-         continue;
-
-      if (formats[i] == MESA_FORMAT_B5G6R5_UNORM) {
-         if (devinfo->ver >= 8) {
-            depth_bits[0] = 16;
-            stencil_bits[0] = 0;
-         } else if (devinfo->ver >= 6) {
-            depth_bits[0] = 24;
-            stencil_bits[0] = 8;
-         } else {
-            depth_bits[0] = 0;
-            stencil_bits[0] = 0;
-         }
-      } else {
-         depth_bits[0] = 24;
-         stencil_bits[0] = 8;
-      }
-
-      new_configs = driCreateConfigs(formats[i],
-                                     depth_bits, stencil_bits, 1,
-                                     back_buffer_modes, 1,
-                                     singlesample_samples, 1,
-                                     true, false);
-      configs = driConcatConfigs(configs, new_configs);
-   }
-
-   /* Generate multisample configs.
-    *
-    * This loop breaks early, and hence is a no-op, on gen < 6.
-    *
-    * Multisample configs must follow the singlesample configs in order to
-    * work around an X server bug present in 1.12. The X server chooses to
-    * associate the first listed RGBA888-Z24S8 config, regardless of its
-    * sample count, with the 32-bit depth visual used for compositing.
-    *
-    * Only doublebuffer configs with GLX_SWAP_UNDEFINED_OML behavior are
-    * supported.  Singlebuffer configs are not supported because no one wants
-    * them.
-    */
-   for (unsigned i = 0; i < num_formats; i++) {
-      if (devinfo->ver < 6)
-         break;
-
-      if (!brw_allowed_format(dri_screen, formats[i]))
-         continue;
-
-      __DRIconfig **new_configs;
-      const int num_depth_stencil_bits = 2;
-      int num_msaa_modes = 0;
-      const uint8_t *multisample_samples = NULL;
-
-      depth_bits[0] = 0;
-      stencil_bits[0] = 0;
-
-      if (formats[i] == MESA_FORMAT_B5G6R5_UNORM && devinfo->ver >= 8) {
-         depth_bits[1] = 16;
-         stencil_bits[1] = 0;
-      } else {
-         depth_bits[1] = 24;
-         stencil_bits[1] = 8;
-      }
-
-      if (devinfo->ver >= 9) {
-         static const uint8_t multisample_samples_gfx9[] = {2, 4, 8, 16};
-         multisample_samples = multisample_samples_gfx9;
-         num_msaa_modes = ARRAY_SIZE(multisample_samples_gfx9);
-      } else if (devinfo->ver == 8) {
-         static const uint8_t multisample_samples_gfx8[] = {2, 4, 8};
-         multisample_samples = multisample_samples_gfx8;
-         num_msaa_modes = ARRAY_SIZE(multisample_samples_gfx8);
-      } else if (devinfo->ver == 7) {
-         static const uint8_t multisample_samples_gfx7[] = {4, 8};
-         multisample_samples = multisample_samples_gfx7;
-         num_msaa_modes = ARRAY_SIZE(multisample_samples_gfx7);
-      } else if (devinfo->ver == 6) {
-         static const uint8_t multisample_samples_gfx6[] = {4};
-         multisample_samples = multisample_samples_gfx6;
-         num_msaa_modes = ARRAY_SIZE(multisample_samples_gfx6);
-      }
-
-      new_configs = driCreateConfigs(formats[i],
-                                     depth_bits,
-                                     stencil_bits,
-                                     num_depth_stencil_bits,
-                                     back_buffer_modes, 1,
-                                     multisample_samples,
-                                     num_msaa_modes,
-                                     false, false);
-      configs = driConcatConfigs(configs, new_configs);
-   }
-
-   if (configs == NULL) {
-      fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
-              __LINE__);
-      return NULL;
-   }
-
-   return configs;
-}
-
-static void
-set_max_gl_versions(struct brw_screen *screen)
-{
-   __DRIscreen *dri_screen = screen->driScrnPriv;
-   const bool has_astc = screen->devinfo.ver >= 9;
-
-   switch (screen->devinfo.ver) {
-   case 11:
-   case 10:
-   case 9:
-   case 8:
-      dri_screen->max_gl_core_version = 46;
-      dri_screen->max_gl_compat_version = 30;
-      dri_screen->max_gl_es1_version = 11;
-      dri_screen->max_gl_es2_version = has_astc ? 32 : 31;
-      break;
-   case 7:
-      dri_screen->max_gl_core_version = 33;
-      if (can_do_pipelined_register_writes(screen)) {
-         dri_screen->max_gl_core_version = 42;
-         if (screen->devinfo.platform == INTEL_PLATFORM_HSW && can_do_compute_dispatch(screen))
-            dri_screen->max_gl_core_version = 43;
-         if (screen->devinfo.platform == INTEL_PLATFORM_HSW && can_do_mi_math_and_lrr(screen))
-            dri_screen->max_gl_core_version = 45;
-      }
-      dri_screen->max_gl_compat_version = 30;
-      dri_screen->max_gl_es1_version = 11;
-      dri_screen->max_gl_es2_version = screen->devinfo.platform == INTEL_PLATFORM_HSW ? 31 : 30;
-      break;
-   case 6:
-      dri_screen->max_gl_core_version = 33;
-      dri_screen->max_gl_compat_version = 30;
-      dri_screen->max_gl_es1_version = 11;
-      dri_screen->max_gl_es2_version = 30;
-      break;
-   case 5:
-   case 4:
-      dri_screen->max_gl_core_version = 0;
-      dri_screen->max_gl_compat_version = 21;
-      dri_screen->max_gl_es1_version = 11;
-      dri_screen->max_gl_es2_version = 20;
-      break;
-   default:
-      unreachable("unrecognized brw_screen::gen");
-   }
-
-   /* OpenGL 3.3+ requires GL_ARB_blend_func_extended.  Don't advertise those
-    * versions if driconf disables the extension.
-    */
-   if (driQueryOptionb(&screen->optionCache, "disable_blend_func_extended")) {
-      dri_screen->max_gl_core_version =
-         MIN2(32, dri_screen->max_gl_core_version);
-      dri_screen->max_gl_compat_version =
-         MIN2(32, dri_screen->max_gl_compat_version);
-   }
-
-   /* Using the `allow_higher_compat_version` option during context creation
-    * means that an application that doesn't request a specific version can be
-    * given a version higher than 3.0.  However, an application still cannot
-    * request a higher version.  For that to work, max_gl_compat_version must
-    * be set.
-    */
-   if (dri_screen->max_gl_compat_version < dri_screen->max_gl_core_version) {
-      if (driQueryOptionb(&screen->optionCache, "allow_higher_compat_version"))
-         dri_screen->max_gl_compat_version = dri_screen->max_gl_core_version;
-   }
-}
-
-static void
-shader_debug_log_mesa(void *data, unsigned *msg_id, const char *fmt, ...)
-{
-   struct brw_context *brw = (struct brw_context *)data;
-   va_list args;
-
-   va_start(args, fmt);
-   _mesa_gl_vdebugf(&brw->ctx, msg_id,
-                    MESA_DEBUG_SOURCE_SHADER_COMPILER,
-                    MESA_DEBUG_TYPE_OTHER,
-                    MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args);
-   va_end(args);
-}
-
-static void
-shader_perf_log_mesa(void *data, unsigned *msg_id, const char *fmt, ...)
-{
-   struct brw_context *brw = (struct brw_context *)data;
-
-   va_list args;
-   va_start(args, fmt);
-
-   if (INTEL_DEBUG(DEBUG_PERF)) {
-      va_list args_copy;
-      va_copy(args_copy, args);
-      vfprintf(stderr, fmt, args_copy);
-      va_end(args_copy);
-   }
-
-   if (brw->perf_debug) {
-      _mesa_gl_vdebugf(&brw->ctx, msg_id,
-                       MESA_DEBUG_SOURCE_SHADER_COMPILER,
-                       MESA_DEBUG_TYPE_PERFORMANCE,
-                       MESA_DEBUG_SEVERITY_MEDIUM, fmt, args);
-   }
-   va_end(args);
-}
-
-/**
- * This is the driver specific part of the createNewScreen entry point.
- * Called when using DRI2.
- *
- * \return the struct gl_config supported by this driver
- */
-static const
-__DRIconfig **brw_init_screen(__DRIscreen *dri_screen)
-{
-   struct brw_screen *screen;
-
-   util_cpu_detect();
-
-   if (dri_screen->image.loader) {
-   } else if (dri_screen->dri2.loader->base.version <= 2 ||
-       dri_screen->dri2.loader->getBuffersWithFormat == NULL) {
-      fprintf(stderr,
-              "\nERROR!  DRI2 loader with getBuffersWithFormat() "
-              "support required\n");
-      return NULL;
-   }
-
-   /* Allocate the private area */
-   screen = rzalloc(NULL, struct brw_screen);
-   if (!screen) {
-      fprintf(stderr, "\nERROR!  Allocating private area failed\n");
-      return NULL;
-   }
-   /* parse information in __driConfigOptions */
-   driOptionCache options;
-   memset(&options, 0, sizeof(options));
-
-   driParseOptionInfo(&options, brw_driconf, ARRAY_SIZE(brw_driconf));
-   driParseConfigFiles(&screen->optionCache, &options, dri_screen->myNum,
-                       "i965", NULL, NULL, NULL, 0, NULL, 0);
-   driDestroyOptionCache(&options);
-
-   screen->driScrnPriv = dri_screen;
-   dri_screen->driverPrivate = (void *) screen;
-
-   if (!intel_get_device_info_from_fd(dri_screen->fd, &screen->devinfo))
-      return NULL;
-
-   const struct intel_device_info *devinfo = &screen->devinfo;
-   screen->deviceID = devinfo->chipset_id;
-
-   if (devinfo->ver >= 12) {
-      fprintf(stderr, "gfx12 and newer are not supported on i965\n");
-      return NULL;
-   }
-
-   if (!brw_init_bufmgr(screen))
-       return NULL;
-
-   brw_process_intel_debug_variable();
-
-   if (INTEL_DEBUG(DEBUG_SHADER_TIME) && devinfo->ver < 7) {
-      fprintf(stderr,
-              "shader_time debugging requires gfx7 (Ivybridge) or better.\n");
-      intel_debug &= ~DEBUG_SHADER_TIME;
-   }
-
-   if (brw_get_integer(screen, I915_PARAM_MMAP_GTT_VERSION) >= 1) {
-      /* Theorectically unlimited! At least for individual objects...
-       *
-       * Currently the entire (global) address space for all GTT maps is
-       * limited to 64bits. That is all objects on the system that are
-       * setup for GTT mmapping must fit within 64bits. An attempt to use
-       * one that exceeds the limit with fail in brw_bo_map_gtt().
-       *
-       * Long before we hit that limit, we will be practically limited by
-       * that any single object must fit in physical memory (RAM). The upper
-       * limit on the CPU's address space is currently 48bits (Skylake), of
-       * which only 39bits can be physical memory. (The GPU itself also has
-       * a 48bit addressable virtual space.) We can fit over 32 million
-       * objects of the current maximum allocable size before running out
-       * of mmap space.
-       */
-      screen->max_gtt_map_object_size = UINT64_MAX;
-   } else {
-      /* Estimate the size of the mappable aperture into the GTT.  There's an
-       * ioctl to get the whole GTT size, but not one to get the mappable subset.
-       * It turns out it's basically always 256MB, though some ancient hardware
-       * was smaller.
-       */
-      uint32_t gtt_size = 256 * 1024 * 1024;
-
-      /* We don't want to map two objects such that a memcpy between them would
-       * just fault one mapping in and then the other over and over forever.  So
-       * we would need to divide the GTT size by 2.  Additionally, some GTT is
-       * taken up by things like the framebuffer and the ringbuffer and such, so
-       * be more conservative.
-       */
-      screen->max_gtt_map_object_size = gtt_size / 4;
-   }
-
-   screen->aperture_threshold = devinfo->aperture_bytes * 3 / 4;
-
-   screen->hw_has_timestamp = brw_detect_timestamp(screen);
-
-   isl_device_init(&screen->isl_dev, &screen->devinfo);
-
-   /* Gfx7-7.5 kernel requirements / command parser saga:
-    *
-    * - pre-v3.16:
-    *   Haswell and Baytrail cannot use any privileged batchbuffer features.
-    *
-    *   Ivybridge has aliasing PPGTT on by default, which accidentally marks
-    *   all batches secure, allowing them to use any feature with no checking.
-    *   This is effectively equivalent to a command parser version of
-    *   \infinity - everything is possible.
-    *
-    *   The command parser does not exist, and querying the version will
-    *   return -EINVAL.
-    *
-    * - v3.16:
-    *   The kernel enables the command parser by default, for systems with
-    *   aliasing PPGTT enabled (Ivybridge and Haswell).  However, the
-    *   hardware checker is still enabled, so Haswell and Baytrail cannot
-    *   do anything.
-    *
-    *   Ivybridge goes from "everything is possible" to "only what the
-    *   command parser allows" (if the user boots with i915.cmd_parser=0,
-    *   then everything is possible again).  We can only safely use features
-    *   allowed by the supported command parser version.
-    *
-    *   Annoyingly, I915_PARAM_CMD_PARSER_VERSION reports the static version
-    *   implemented by the kernel, even if it's turned off.  So, checking
-    *   for version > 0 does not mean that you can write registers.  We have
-    *   to try it and see.  The version does, however, indicate the age of
-    *   the kernel.
-    *
-    *   Instead of matching the hardware checker's behavior of converting
-    *   privileged commands to MI_NOOP, it makes execbuf2 start returning
-    *   -EINVAL, making it dangerous to try and use privileged features.
-    *
-    *   Effective command parser versions:
-    *   - Haswell:   0 (reporting 1, writes don't work)
-    *   - Baytrail:  0 (reporting 1, writes don't work)
-    *   - Ivybridge: 1 (enabled) or infinite (disabled)
-    *
-    * - v3.17:
-    *   Baytrail aliasing PPGTT is enabled, making it like Ivybridge:
-    *   effectively version 1 (enabled) or infinite (disabled).
-    *
-    * - v3.19: f1f55cc0556031c8ee3fe99dae7251e78b9b653b
-    *   Command parser v2 supports predicate writes.
-    *
-    *   - Haswell:   0 (reporting 1, writes don't work)
-    *   - Baytrail:  2 (enabled) or infinite (disabled)
-    *   - Ivybridge: 2 (enabled) or infinite (disabled)
-    *
-    *   So version >= 2 is enough to know that Ivybridge and Baytrail
-    *   will work.  Haswell still can't do anything.
-    *
-    * - v4.0: Version 3 happened.  Largely not relevant.
-    *
-    * - v4.1: 6702cf16e0ba8b0129f5aa1b6609d4e9c70bc13b
-    *   L3 config registers are properly saved and restored as part
-    *   of the hardware context.  We can approximately detect this point
-    *   in time by checking if I915_PARAM_REVISION is recognized - it
-    *   landed in a later commit, but in the same release cycle.
-    *
-    * - v4.2: 245054a1fe33c06ad233e0d58a27ec7b64db9284
-    *   Command parser finally gains secure batch promotion.  On Haswell,
-    *   the hardware checker gets disabled, which finally allows it to do
-    *   privileged commands.
-    *
-    *   I915_PARAM_CMD_PARSER_VERSION reports 3.  Effective versions:
-    *   - Haswell:   3 (enabled) or 0 (disabled)
-    *   - Baytrail:  3 (enabled) or infinite (disabled)
-    *   - Ivybridge: 3 (enabled) or infinite (disabled)
-    *
-    *   Unfortunately, detecting this point in time is tricky, because
-    *   no version bump happened when this important change occurred.
-    *   On Haswell, if we can write any register, then the kernel is at
-    *   least this new, and we can start trusting the version number.
-    *
-    * - v4.4: 2bbe6bbb0dc94fd4ce287bdac9e1bd184e23057b and
-    *   Command parser reaches version 4, allowing access to Haswell
-    *   atomic scratch and chicken3 registers.  If version >= 4, we know
-    *   the kernel is new enough to support privileged features on all
-    *   hardware.  However, the user might have disabled it...and the
-    *   kernel will still report version 4.  So we still have to guess
-    *   and check.
-    *
-    * - v4.4: 7b9748cb513a6bef4af87b79f0da3ff7e8b56cd8
-    *   Command parser v5 whitelists indirect compute shader dispatch
-    *   registers, needed for OpenGL 4.3 and later.
-    *
-    * - v4.8:
-    *   Command parser v7 lets us use MI_MATH on Haswell.
-    *
-    *   Additionally, the kernel begins reporting version 0 when
-    *   the command parser is disabled, allowing us to skip the
-    *   guess-and-check step on Haswell.  Unfortunately, this also
-    *   means that we can no longer use it as an indicator of the
-    *   age of the kernel.
-    */
-   if (brw_get_param(screen, I915_PARAM_CMD_PARSER_VERSION,
-                       &screen->cmd_parser_version) < 0) {
-      /* Command parser does not exist - getparam is unrecognized */
-      screen->cmd_parser_version = 0;
-   }
-
-   /* Kernel 4.13 retuired for exec object capture */
-   if (brw_get_boolean(screen, I915_PARAM_HAS_EXEC_CAPTURE)) {
-      screen->kernel_features |= KERNEL_ALLOWS_EXEC_CAPTURE;
-   }
-
-   if (brw_get_boolean(screen, I915_PARAM_HAS_EXEC_BATCH_FIRST)) {
-      screen->kernel_features |= KERNEL_ALLOWS_EXEC_BATCH_FIRST;
-   }
-
-   if (!brw_detect_pipelined_so(screen)) {
-      /* We can't do anything, so the effective version is 0. */
-      screen->cmd_parser_version = 0;
-   } else {
-      screen->kernel_features |= KERNEL_ALLOWS_SOL_OFFSET_WRITES;
-   }
-
-   if (devinfo->ver >= 8 || screen->cmd_parser_version >= 2)
-      screen->kernel_features |= KERNEL_ALLOWS_PREDICATE_WRITES;
-
-   /* Haswell requires command parser version 4 in order to have L3
-    * atomic scratch1 and chicken3 bits
-    */
-   if (devinfo->verx10 == 75 && screen->cmd_parser_version >= 4) {
-      screen->kernel_features |=
-         KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3;
-   }
-
-   /* Haswell requires command parser version 6 in order to write to the
-    * MI_MATH GPR registers, and version 7 in order to use
-    * MI_LOAD_REGISTER_REG (which all users of MI_MATH use).
-    */
-   if (devinfo->ver >= 8 ||
-       (devinfo->verx10 == 75 && screen->cmd_parser_version >= 7)) {
-      screen->kernel_features |= KERNEL_ALLOWS_MI_MATH_AND_LRR;
-   }
-
-   /* Gfx7 needs at least command parser version 5 to support compute */
-   if (devinfo->ver >= 8 || screen->cmd_parser_version >= 5)
-      screen->kernel_features |= KERNEL_ALLOWS_COMPUTE_DISPATCH;
-
-   if (brw_get_boolean(screen, I915_PARAM_HAS_CONTEXT_ISOLATION))
-      screen->kernel_features |= KERNEL_ALLOWS_CONTEXT_ISOLATION;
-
-   const char *force_msaa = getenv("INTEL_FORCE_MSAA");
-   if (force_msaa) {
-      screen->winsys_msaa_samples_override =
-         brw_quantize_num_samples(screen, atoi(force_msaa));
-      printf("Forcing winsys sample count to %d\n",
-             screen->winsys_msaa_samples_override);
-   } else {
-      screen->winsys_msaa_samples_override = -1;
-   }
-
-   set_max_gl_versions(screen);
-
-   /* Notification of GPU resets requires hardware contexts and a kernel new
-    * enough to support DRM_IOCTL_I915_GET_RESET_STATS.  If the ioctl is
-    * supported, calling it with a context of 0 will either generate EPERM or
-    * no error.  If the ioctl is not supported, it always generate EINVAL.
-    * Use this to determine whether to advertise the __DRI2_ROBUSTNESS
-    * extension to the loader.
-    *
-    * Don't even try on pre-Gfx6, since we don't attempt to use contexts there.
-    */
-   if (devinfo->ver >= 6) {
-      struct drm_i915_reset_stats stats;
-      memset(&stats, 0, sizeof(stats));
-
-      const int ret = drmIoctl(screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats);
-
-      screen->has_context_reset_notification =
-         (ret != -1 || errno != EINVAL);
-   }
-
-   dri_screen->extensions = !screen->has_context_reset_notification
-      ? screenExtensions : brwRobustScreenExtensions;
-
-   screen->compiler = brw_compiler_create(screen, devinfo);
-   screen->compiler->shader_debug_log = shader_debug_log_mesa;
-   screen->compiler->shader_perf_log = shader_perf_log_mesa;
-
-   /* Changing the meaning of constant buffer pointers from a dynamic state
-    * offset to an absolute address is only safe if the kernel isolates other
-    * contexts from our changes.
-    */
-   screen->compiler->constant_buffer_0_is_relative = devinfo->ver < 8 ||
-      !(screen->kernel_features & KERNEL_ALLOWS_CONTEXT_ISOLATION);
-
-   screen->compiler->glsl_compiler_options[MESA_SHADER_VERTEX].PositionAlwaysInvariant = driQueryOptionb(&screen->optionCache, "vs_position_always_invariant");
-   screen->compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].PositionAlwaysPrecise = driQueryOptionb(&screen->optionCache, "vs_position_always_precise");
-
-   screen->compiler->supports_pull_constants = true;
-   screen->compiler->compact_params = true;
-   screen->compiler->lower_variable_group_size = true;
-
-   screen->has_exec_fence =
-     brw_get_boolean(screen, I915_PARAM_HAS_EXEC_FENCE);
-
-   brw_screen_init_surface_formats(screen);
-
-   if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_SUBMIT)) {
-      unsigned int caps = brw_get_integer(screen, I915_PARAM_HAS_SCHEDULER);
-      if (caps) {
-         fprintf(stderr, "Kernel scheduler detected: %08x\n", caps);
-         if (caps & I915_SCHEDULER_CAP_PRIORITY)
-            fprintf(stderr, "  - User priority sorting enabled\n");
-         if (caps & I915_SCHEDULER_CAP_PREEMPTION)
-            fprintf(stderr, "  - Preemption enabled\n");
-      }
-   }
-
-   brw_disk_cache_init(screen);
-
-   return (const __DRIconfig**) brw_screen_make_configs(dri_screen);
-}
-
-struct brw_buffer {
-   __DRIbuffer base;
-   struct brw_bo *bo;
-};
-
-static __DRIbuffer *
-brw_allocate_buffer(__DRIscreen *dri_screen,
-                    unsigned attachment, unsigned format,
-                    int width, int height)
-{
-   struct brw_screen *screen = dri_screen->driverPrivate;
-
-   assert(attachment == __DRI_BUFFER_FRONT_LEFT ||
-          attachment == __DRI_BUFFER_BACK_LEFT);
-
-   struct brw_buffer *buffer = calloc(1, sizeof *buffer);
-   if (buffer == NULL)
-      return NULL;
-
-   /* The front and back buffers are color buffers, which are X tiled. GFX9+
-    * supports Y tiled and compressed buffers, but there is no way to plumb that
-    * through to here. */
-   uint32_t pitch;
-   int cpp = format / 8;
-   buffer->bo = brw_bo_alloc_tiled_2d(screen->bufmgr,
-                                      __func__,
-                                      width,
-                                      height,
-                                      cpp,
-                                      BRW_MEMZONE_OTHER,
-                                      I915_TILING_X, &pitch,
-                                      BO_ALLOC_BUSY);
-
-   if (buffer->bo == NULL) {
-      free(buffer);
-      return NULL;
-   }
-
-   brw_bo_flink(buffer->bo, &buffer->base.name);
-
-   buffer->base.attachment = attachment;
-   buffer->base.cpp = cpp;
-   buffer->base.pitch = pitch;
-
-   return &buffer->base;
-}
-
-static void
-brw_release_buffer(UNUSED __DRIscreen *dri_screen, __DRIbuffer *_buffer)
-{
-   struct brw_buffer *buffer = (struct brw_buffer *) _buffer;
-
-   brw_bo_unreference(buffer->bo);
-   free(buffer);
-}
-
-static const struct __DriverAPIRec brw_driver_api = {
-   .InitScreen           = brw_init_screen,
-   .DestroyScreen        = brw_destroy_screen,
-   .CreateContext        = brw_create_context,
-   .DestroyContext       = brw_destroy_context,
-   .CreateBuffer         = brw_create_buffer,
-   .DestroyBuffer        = brw_destroy_buffer,
-   .MakeCurrent          = brw_make_current,
-   .UnbindContext        = brw_unbind_context,
-   .AllocateBuffer       = brw_allocate_buffer,
-   .ReleaseBuffer        = brw_release_buffer
-};
-
-static const struct __DRIDriverVtableExtensionRec brw_vtable = {
-   .base = { __DRI_DRIVER_VTABLE, 1 },
-   .vtable = &brw_driver_api,
-};
-
-static const __DRIextension *brw_driver_extensions[] = {
-    &driCoreExtension.base,
-    &driImageDriverExtension.base,
-    &driDRI2Extension.base,
-    &brw_vtable.base,
-    &brw_config_options.base,
-    NULL
-};
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void)
-{
-   globalDriverAPI = &brw_driver_api;
-
-   return brw_driver_extensions;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_screen.h b/src/mesa/drivers/dri/i965/brw_screen.h
deleted file mode 100644
index b68c2ac..0000000
--- a/src/mesa/drivers/dri/i965/brw_screen.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _INTEL_INIT_H_
-#define _INTEL_INIT_H_
-
-#include <stdbool.h>
-#include <sys/time.h>
-
-#include <GL/internal/dri_interface.h>
-
-#include "isl/isl.h"
-#include "dri_util.h"
-#include "brw_bufmgr.h"
-#include "dev/intel_device_info.h"
-#include "drm-uapi/i915_drm.h"
-#include "util/xmlconfig.h"
-
-#include "isl/isl.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct brw_screen
-{
-   int deviceID;
-   struct intel_device_info devinfo;
-
-   __DRIscreen *driScrnPriv;
-
-   uint64_t max_gtt_map_object_size;
-
-   /** Bytes of aperture usage beyond which execbuf is likely to fail. */
-   uint64_t aperture_threshold;
-
-   /** DRM fd associated with this screen. Not owned by this object. Do not close. */
-   int fd;
-
-   bool has_exec_fence; /**< I915_PARAM_HAS_EXEC_FENCE */
-
-   int hw_has_timestamp;
-
-   struct isl_device isl_dev;
-
-   /**
-    * Does the kernel support context reset notifications?
-    */
-   bool has_context_reset_notification;
-
-   /**
-    * Does the kernel support features such as pipelined register access to
-    * specific registers?
-    */
-   unsigned kernel_features;
-#define KERNEL_ALLOWS_SOL_OFFSET_WRITES             (1<<0)
-#define KERNEL_ALLOWS_PREDICATE_WRITES              (1<<1)
-#define KERNEL_ALLOWS_MI_MATH_AND_LRR               (1<<2)
-#define KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3 (1<<3)
-#define KERNEL_ALLOWS_COMPUTE_DISPATCH              (1<<4)
-#define KERNEL_ALLOWS_EXEC_CAPTURE                  (1<<5)
-#define KERNEL_ALLOWS_EXEC_BATCH_FIRST              (1<<6)
-#define KERNEL_ALLOWS_CONTEXT_ISOLATION             (1<<7)
-
-   struct brw_bufmgr *bufmgr;
-
-   /**
-    * A unique ID for shader programs.
-    */
-   unsigned program_id;
-
-   int winsys_msaa_samples_override;
-
-   struct brw_compiler *compiler;
-
-   /**
-   * Configuration cache with default values for all contexts
-   */
-   driOptionCache optionCache;
-
-   /**
-    * Version of the command parser reported by the
-    * I915_PARAM_CMD_PARSER_VERSION parameter
-    */
-   int cmd_parser_version;
-
-   bool mesa_format_supports_texture[MESA_FORMAT_COUNT];
-   bool mesa_format_supports_render[MESA_FORMAT_COUNT];
-   enum isl_format mesa_to_isl_render_format[MESA_FORMAT_COUNT];
-
-   struct disk_cache *disk_cache;
-};
-
-extern void brw_destroy_context(__DRIcontext *driContextPriv);
-
-extern GLboolean brw_unbind_context(__DRIcontext *driContextPriv);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void);
-extern const __DRI2fenceExtension brwFenceExtension;
-
-extern GLboolean
-brw_make_current(__DRIcontext *driContextPriv,
-                 __DRIdrawable *driDrawPriv,
-                 __DRIdrawable *driReadPriv);
-
-double get_time(void);
-
-const int*
-brw_supported_msaa_modes(const struct brw_screen *screen);
-
-static inline bool
-can_do_pipelined_register_writes(const struct brw_screen *screen)
-{
-   return screen->kernel_features & KERNEL_ALLOWS_SOL_OFFSET_WRITES;
-}
-
-static inline bool
-can_do_hsw_l3_atomics(const struct brw_screen *screen)
-{
-   return screen->kernel_features & KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3;
-}
-
-static inline bool
-can_do_mi_math_and_lrr(const struct brw_screen *screen)
-{
-   return screen->kernel_features & KERNEL_ALLOWS_MI_MATH_AND_LRR;
-}
-
-static inline bool
-can_do_compute_dispatch(const struct brw_screen *screen)
-{
-   return screen->kernel_features & KERNEL_ALLOWS_COMPUTE_DISPATCH;
-}
-
-static inline bool
-can_do_predicate_writes(const struct brw_screen *screen)
-{
-   return screen->kernel_features & KERNEL_ALLOWS_PREDICATE_WRITES;
-}
-
-static inline bool
-can_do_exec_capture(const struct brw_screen *screen)
-{
-   return screen->kernel_features & KERNEL_ALLOWS_EXEC_CAPTURE;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
deleted file mode 100644
index 8905033..0000000
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-#include "compiler/nir/nir.h"
-#include "main/macros.h"
-#include "main/mtypes.h"
-#include "main/enums.h"
-#include "main/fbobject.h"
-#include "main/state.h"
-
-#include "brw_batch.h"
-
-#include "brw_defines.h"
-#include "brw_context.h"
-#include "brw_util.h"
-#include "brw_state.h"
-#include "compiler/brw_eu.h"
-
-#include "util/ralloc.h"
-
-static void
-compile_sf_prog(struct brw_context *brw, struct brw_sf_prog_key *key)
-{
-   const unsigned *program;
-   void *mem_ctx;
-   unsigned program_size;
-
-   mem_ctx = ralloc_context(NULL);
-
-   struct brw_sf_prog_data prog_data;
-   program = brw_compile_sf(brw->screen->compiler, mem_ctx, key, &prog_data,
-                            &brw->vue_map_geom_out, &program_size);
-
-   brw_upload_cache(&brw->cache, BRW_CACHE_SF_PROG,
-                    key, sizeof(*key),
-                    program, program_size,
-                    &prog_data, sizeof(prog_data),
-                    &brw->sf.prog_offset, &brw->sf.prog_data);
-   ralloc_free(mem_ctx);
-}
-
-/* Calculate interpolants for triangle and line rasterization.
- */
-void
-brw_upload_sf_prog(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   struct brw_sf_prog_key key;
-
-   if (!brw_state_dirty(brw,
-                        _NEW_BUFFERS |
-                        _NEW_HINT |
-                        _NEW_LIGHT |
-                        _NEW_POINT |
-                        _NEW_POLYGON |
-                        _NEW_PROGRAM |
-                        _NEW_TRANSFORM,
-                        BRW_NEW_BLORP |
-                        BRW_NEW_FS_PROG_DATA |
-                        BRW_NEW_REDUCED_PRIMITIVE |
-                        BRW_NEW_VUE_MAP_GEOM_OUT))
-      return;
-
-   /* _NEW_BUFFERS */
-   bool flip_y = ctx->DrawBuffer->FlipY;
-
-   memset(&key, 0, sizeof(key));
-
-   /* Populate the key, noting state dependencies:
-    */
-   /* BRW_NEW_VUE_MAP_GEOM_OUT */
-   key.attrs = brw->vue_map_geom_out.slots_valid;
-
-   /* BRW_NEW_REDUCED_PRIMITIVE */
-   switch (brw->reduced_primitive) {
-   case GL_TRIANGLES:
-      /* NOTE: We just use the edgeflag attribute as an indicator that
-       * unfilled triangles are active.  We don't actually do the
-       * edgeflag testing here, it is already done in the clip
-       * program.
-       */
-      if (key.attrs & BITFIELD64_BIT(VARYING_SLOT_EDGE))
-         key.primitive = BRW_SF_PRIM_UNFILLED_TRIS;
-      else
-         key.primitive = BRW_SF_PRIM_TRIANGLES;
-      break;
-   case GL_LINES:
-      key.primitive = BRW_SF_PRIM_LINES;
-      break;
-   case GL_POINTS:
-      key.primitive = BRW_SF_PRIM_POINTS;
-      break;
-   }
-
-   /* _NEW_TRANSFORM */
-   key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
-
-   /* _NEW_POINT */
-   key.do_point_sprite = ctx->Point.PointSprite;
-   if (key.do_point_sprite) {
-      key.point_sprite_coord_replace = ctx->Point.CoordReplace & 0xff;
-   }
-   if (brw->programs[MESA_SHADER_FRAGMENT]->info.inputs_read &
-       BITFIELD64_BIT(VARYING_SLOT_PNTC)) {
-      key.do_point_coord = 1;
-   }
-
-   /*
-    * Window coordinates in a FBO are inverted, which means point
-    * sprite origin must be inverted, too.
-    */
-   if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == flip_y)
-      key.sprite_origin_lower_left = true;
-
-   /* BRW_NEW_FS_PROG_DATA */
-   const struct brw_wm_prog_data *wm_prog_data =
-      brw_wm_prog_data(brw->wm.base.prog_data);
-   if (wm_prog_data) {
-      key.contains_flat_varying = wm_prog_data->contains_flat_varying;
-
-      STATIC_ASSERT(sizeof(key.interp_mode) ==
-                    sizeof(wm_prog_data->interp_mode));
-      memcpy(key.interp_mode, wm_prog_data->interp_mode,
-             sizeof(key.interp_mode));
-   }
-
-   /* _NEW_LIGHT | _NEW_PROGRAM */
-   key.do_twoside_color = _mesa_vertex_program_two_side_enabled(ctx);
-
-   /* _NEW_POLYGON */
-   if (key.do_twoside_color) {
-      /* If we're rendering to a FBO, we have to invert the polygon
-       * face orientation, just as we invert the viewport in
-       * sf_unit_create_from_key().
-       */
-      key.frontface_ccw = brw->polygon_front_bit != flip_y;
-   }
-
-   if (!brw_search_cache(&brw->cache, BRW_CACHE_SF_PROG, &key, sizeof(key),
-                         &brw->sf.prog_offset, &brw->sf.prog_data, true)) {
-      compile_sf_prog( brw, &key );
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/brw_state.c b/src/mesa/drivers/dri/i965/brw_state.c
deleted file mode 100644
index 9901746..0000000
--- a/src/mesa/drivers/dri/i965/brw_state.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/context.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "main/dd.h"
-
-#include "brw_screen.h"
-#include "brw_context.h"
-#include "brw_defines.h"
-
-int
-brw_translate_shadow_compare_func(GLenum func)
-{
-   /* GL specifies the result of shadow comparisons as:
-    *     1     if   ref <op> texel,
-    *     0     otherwise.
-    *
-    * The hardware does:
-    *     0     if texel <op> ref,
-    *     1     otherwise.
-    *
-    * So, these look a bit strange because there's both a negation
-    * and swapping of the arguments involved.
-    */
-   switch (func) {
-   case GL_NEVER:
-      return BRW_COMPAREFUNCTION_ALWAYS;
-   case GL_LESS:
-      return BRW_COMPAREFUNCTION_LEQUAL;
-   case GL_LEQUAL:
-      return BRW_COMPAREFUNCTION_LESS;
-   case GL_GREATER:
-      return BRW_COMPAREFUNCTION_GEQUAL;
-   case GL_GEQUAL:
-      return BRW_COMPAREFUNCTION_GREATER;
-   case GL_NOTEQUAL:
-      return BRW_COMPAREFUNCTION_EQUAL;
-   case GL_EQUAL:
-      return BRW_COMPAREFUNCTION_NOTEQUAL;
-   case GL_ALWAYS:
-      return BRW_COMPAREFUNCTION_NEVER;
-   }
-
-   unreachable("Invalid shadow comparison function.");
-}
-
-int
-brw_translate_compare_func(GLenum func)
-{
-   switch (func) {
-   case GL_NEVER:
-      return BRW_COMPAREFUNCTION_NEVER;
-   case GL_LESS:
-      return BRW_COMPAREFUNCTION_LESS;
-   case GL_LEQUAL:
-      return BRW_COMPAREFUNCTION_LEQUAL;
-   case GL_GREATER:
-      return BRW_COMPAREFUNCTION_GREATER;
-   case GL_GEQUAL:
-      return BRW_COMPAREFUNCTION_GEQUAL;
-   case GL_NOTEQUAL:
-      return BRW_COMPAREFUNCTION_NOTEQUAL;
-   case GL_EQUAL:
-      return BRW_COMPAREFUNCTION_EQUAL;
-   case GL_ALWAYS:
-      return BRW_COMPAREFUNCTION_ALWAYS;
-   }
-
-   unreachable("Invalid comparison function.");
-}
-
-int
-brw_translate_stencil_op(GLenum op)
-{
-   switch (op) {
-   case GL_KEEP:
-      return BRW_STENCILOP_KEEP;
-   case GL_ZERO:
-      return BRW_STENCILOP_ZERO;
-   case GL_REPLACE:
-      return BRW_STENCILOP_REPLACE;
-   case GL_INCR:
-      return BRW_STENCILOP_INCRSAT;
-   case GL_DECR:
-      return BRW_STENCILOP_DECRSAT;
-   case GL_INCR_WRAP:
-      return BRW_STENCILOP_INCR;
-   case GL_DECR_WRAP:
-      return BRW_STENCILOP_DECR;
-   case GL_INVERT:
-      return BRW_STENCILOP_INVERT;
-   default:
-      return BRW_STENCILOP_ZERO;
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
deleted file mode 100644
index 17d9e54..0000000
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ /dev/null
@@ -1,370 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-
-#ifndef BRW_STATE_H
-#define BRW_STATE_H
-
-#include "brw_context.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-enum intel_msaa_layout;
-
-extern const struct brw_tracked_state brw_blend_constant_color;
-extern const struct brw_tracked_state brw_clip_unit;
-extern const struct brw_tracked_state brw_vs_pull_constants;
-extern const struct brw_tracked_state brw_tcs_pull_constants;
-extern const struct brw_tracked_state brw_tes_pull_constants;
-extern const struct brw_tracked_state brw_gs_pull_constants;
-extern const struct brw_tracked_state brw_wm_pull_constants;
-extern const struct brw_tracked_state brw_cs_pull_constants;
-extern const struct brw_tracked_state brw_constant_buffer;
-extern const struct brw_tracked_state brw_curbe_offsets;
-extern const struct brw_tracked_state brw_binding_table_pointers;
-extern const struct brw_tracked_state brw_depthbuffer;
-extern const struct brw_tracked_state brw_recalculate_urb_fence;
-extern const struct brw_tracked_state brw_sf_vp;
-extern const struct brw_tracked_state brw_cs_texture_surfaces;
-extern const struct brw_tracked_state brw_vs_ubo_surfaces;
-extern const struct brw_tracked_state brw_vs_image_surfaces;
-extern const struct brw_tracked_state brw_tcs_ubo_surfaces;
-extern const struct brw_tracked_state brw_tcs_image_surfaces;
-extern const struct brw_tracked_state brw_tes_ubo_surfaces;
-extern const struct brw_tracked_state brw_tes_image_surfaces;
-extern const struct brw_tracked_state brw_gs_ubo_surfaces;
-extern const struct brw_tracked_state brw_gs_image_surfaces;
-extern const struct brw_tracked_state brw_renderbuffer_surfaces;
-extern const struct brw_tracked_state brw_renderbuffer_read_surfaces;
-extern const struct brw_tracked_state brw_texture_surfaces;
-extern const struct brw_tracked_state brw_wm_binding_table;
-extern const struct brw_tracked_state brw_gs_binding_table;
-extern const struct brw_tracked_state brw_tes_binding_table;
-extern const struct brw_tracked_state brw_tcs_binding_table;
-extern const struct brw_tracked_state brw_vs_binding_table;
-extern const struct brw_tracked_state brw_wm_ubo_surfaces;
-extern const struct brw_tracked_state brw_wm_image_surfaces;
-extern const struct brw_tracked_state brw_cs_ubo_surfaces;
-extern const struct brw_tracked_state brw_cs_image_surfaces;
-
-extern const struct brw_tracked_state brw_psp_urb_cbs;
-
-extern const struct brw_tracked_state brw_indices;
-extern const struct brw_tracked_state brw_index_buffer;
-extern const struct brw_tracked_state gfx7_cs_push_constants;
-extern const struct brw_tracked_state gfx6_binding_table_pointers;
-extern const struct brw_tracked_state gfx6_gs_binding_table;
-extern const struct brw_tracked_state gfx6_renderbuffer_surfaces;
-extern const struct brw_tracked_state gfx6_sampler_state;
-extern const struct brw_tracked_state gfx6_sol_surface;
-extern const struct brw_tracked_state gfx6_sf_vp;
-extern const struct brw_tracked_state gfx6_urb;
-extern const struct brw_tracked_state gfx7_l3_state;
-extern const struct brw_tracked_state gfx7_push_constant_space;
-extern const struct brw_tracked_state gfx7_urb;
-extern const struct brw_tracked_state gfx8_pma_fix;
-extern const struct brw_tracked_state brw_cs_work_groups_surface;
-
-void gfx4_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
-                                struct brw_bo *bo, uint32_t offset,
-                                uint64_t imm);
-void gfx45_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
-                                 struct brw_bo *bo, uint32_t offset,
-                                 uint64_t imm);
-void gfx5_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
-                                struct brw_bo *bo, uint32_t offset,
-                                uint64_t imm);
-void gfx6_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
-                                struct brw_bo *bo, uint32_t offset,
-                                uint64_t imm);
-void gfx7_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
-                                struct brw_bo *bo, uint32_t offset,
-                                uint64_t imm);
-void gfx75_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
-                                 struct brw_bo *bo, uint32_t offset,
-                                 uint64_t imm);
-void gfx8_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
-                                struct brw_bo *bo, uint32_t offset,
-                                uint64_t imm);
-void gfx9_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
-                                struct brw_bo *bo, uint32_t offset,
-                                uint64_t imm);
-void gfx11_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
-                                 struct brw_bo *bo, uint32_t offset,
-                                 uint64_t imm);
-
-static inline bool
-brw_state_dirty(const struct brw_context *brw,
-                GLuint mesa_flags, uint64_t brw_flags)
-{
-   return ((brw->NewGLState & mesa_flags) |
-           (brw->ctx.NewDriverState & brw_flags)) != 0;
-}
-
-/* brw_binding_tables.c */
-void brw_upload_binding_table(struct brw_context *brw,
-                              uint32_t packet_name,
-                              const struct brw_stage_prog_data *prog_data,
-                              struct brw_stage_state *stage_state);
-
-/* brw_misc_state.c */
-void brw_upload_invariant_state(struct brw_context *brw);
-uint32_t
-brw_depthbuffer_format(struct brw_context *brw);
-
-/* gfx8_depth_state.c */
-void gfx8_write_pma_stall_bits(struct brw_context *brw,
-                               uint32_t pma_stall_bits);
-
-/* brw_disk_cache.c */
-void brw_disk_cache_init(struct brw_screen *screen);
-bool brw_disk_cache_upload_program(struct brw_context *brw,
-                                   gl_shader_stage stage);
-void brw_disk_cache_write_compute_program(struct brw_context *brw);
-void brw_disk_cache_write_render_programs(struct brw_context *brw);
-
-/***********************************************************************
- * brw_state_upload.c
- */
-void brw_upload_render_state(struct brw_context *brw);
-void brw_render_state_finished(struct brw_context *brw);
-void brw_upload_compute_state(struct brw_context *brw);
-void brw_compute_state_finished(struct brw_context *brw);
-void brw_init_state(struct brw_context *brw);
-void brw_destroy_state(struct brw_context *brw);
-void brw_emit_select_pipeline(struct brw_context *brw,
-                              enum brw_pipeline pipeline);
-void brw_enable_obj_preemption(struct brw_context *brw, bool enable);
-
-static inline void
-brw_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
-{
-   if (unlikely(brw->last_pipeline != pipeline)) {
-      assert(pipeline < BRW_NUM_PIPELINES);
-      brw_emit_select_pipeline(brw, pipeline);
-      brw->last_pipeline = pipeline;
-   }
-}
-
-/***********************************************************************
- * brw_program_cache.c
- */
-
-void brw_upload_cache(struct brw_cache *cache,
-                      enum brw_cache_id cache_id,
-                      const void *key,
-                      GLuint key_sz,
-                      const void *data,
-                      GLuint data_sz,
-                      const void *aux,
-                      GLuint aux_sz,
-                      uint32_t *out_offset, void *out_aux);
-
-bool brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
-                      const void *key, GLuint key_size, uint32_t *inout_offset,
-                      void *inout_aux, bool flag_state);
-
-const void *brw_find_previous_compile(struct brw_cache *cache,
-                                      enum brw_cache_id cache_id,
-                                      unsigned program_string_id);
-
-void brw_program_cache_check_size(struct brw_context *brw);
-
-void brw_init_caches( struct brw_context *brw );
-void brw_destroy_caches( struct brw_context *brw );
-
-void brw_print_program_cache(struct brw_context *brw);
-
-enum brw_cache_id brw_stage_cache_id(gl_shader_stage stage);
-
-/* brw_batch.c */
-void brw_require_statebuffer_space(struct brw_context *brw, int size);
-void *brw_state_batch(struct brw_context *brw,
-                      int size, int alignment, uint32_t *out_offset);
-
-/* brw_wm_surface_state.c */
-uint32_t brw_get_surface_tiling_bits(uint32_t tiling);
-uint32_t brw_get_surface_num_multisamples(unsigned num_samples);
-enum isl_format brw_isl_format_for_mesa_format(mesa_format mesa_format);
-
-GLuint translate_tex_target(GLenum target);
-
-enum isl_format translate_tex_format(struct brw_context *brw,
-                                     mesa_format mesa_format,
-                                     GLenum srgb_decode);
-
-int brw_get_texture_swizzle(const struct gl_context *ctx,
-                            const struct gl_texture_object *t);
-
-void brw_emit_buffer_surface_state(struct brw_context *brw,
-                                   uint32_t *out_offset,
-                                   struct brw_bo *bo,
-                                   unsigned buffer_offset,
-                                   unsigned surface_format,
-                                   unsigned buffer_size,
-                                   unsigned pitch,
-                                   unsigned reloc_flags);
-
-/* brw_sampler_state.c */
-void brw_emit_sampler_state(struct brw_context *brw,
-                            uint32_t *sampler_state,
-                            uint32_t batch_offset_for_sampler_state,
-                            unsigned min_filter,
-                            unsigned mag_filter,
-                            unsigned mip_filter,
-                            unsigned max_anisotropy,
-                            unsigned address_rounding,
-                            unsigned wrap_s,
-                            unsigned wrap_t,
-                            unsigned wrap_r,
-                            unsigned base_level,
-                            unsigned min_lod,
-                            unsigned max_lod,
-                            int lod_bias,
-                            unsigned shadow_function,
-                            bool non_normalized_coordinates,
-                            uint32_t border_color_offset);
-
-/* gfx6_constant_state.c */
-void
-brw_populate_constant_data(struct brw_context *brw,
-                           const struct gl_program *prog,
-                           const struct brw_stage_state *stage_state,
-                           void *dst,
-                           const uint32_t *param,
-                           unsigned nr_params);
-void
-brw_upload_pull_constants(struct brw_context *brw,
-                          GLbitfield64 brw_new_constbuf,
-                          const struct gl_program *prog,
-                          struct brw_stage_state *stage_state,
-                          const struct brw_stage_prog_data *prog_data);
-void
-brw_upload_cs_push_constants(struct brw_context *brw,
-                             const struct gl_program *prog,
-                             const struct brw_cs_prog_data *cs_prog_data,
-                             struct brw_stage_state *stage_state);
-
-/* gfx7_vs_state.c */
-void
-gfx7_upload_constant_state(struct brw_context *brw,
-                           const struct brw_stage_state *stage_state,
-                           bool active, unsigned opcode);
-
-/* brw_clip.c */
-void brw_upload_clip_prog(struct brw_context *brw);
-
-/* brw_sf.c */
-void brw_upload_sf_prog(struct brw_context *brw);
-
-bool brw_is_drawing_points(const struct brw_context *brw);
-bool brw_is_drawing_lines(const struct brw_context *brw);
-
-/* gfx7_l3_state.c */
-void
-gfx7_restore_default_l3_config(struct brw_context *brw);
-
-static inline bool
-use_state_point_size(const struct brw_context *brw)
-{
-   const struct gl_context *ctx = &brw->ctx;
-
-   /* Section 14.4 (Points) of the OpenGL 4.5 specification says:
-    *
-    *    "If program point size mode is enabled, the derived point size is
-    *     taken from the (potentially clipped) shader built-in gl_PointSize
-    *     written by:
-    *
-    *        * the geometry shader, if active;
-    *        * the tessellation evaluation shader, if active and no
-    *          geometry shader is active;
-    *        * the vertex shader, otherwise
-    *
-    *    and clamped to the implementation-dependent point size range.  If
-    *    the value written to gl_PointSize is less than or equal to zero,
-    *    or if no value was written to gl_PointSize, results are undefined.
-    *    If program point size mode is disabled, the derived point size is
-    *    specified with the command
-    *
-    *       void PointSize(float size);
-    *
-    *    size specifies the requested size of a point.  The default value
-    *    is 1.0."
-    *
-    * The rules for GLES come from the ES 3.2, OES_geometry_point_size, and
-    * OES_tessellation_point_size specifications.  To summarize: if the last
-    * stage before rasterization is a GS or TES, then use gl_PointSize from
-    * the shader if written.  Otherwise, use 1.0.  If the last stage is a
-    * vertex shader, use gl_PointSize, or it is undefined.
-    *
-    * We can combine these rules into a single condition for both APIs.
-    * Using the state point size when the last shader stage doesn't write
-    * gl_PointSize satisfies GL's requirements, as it's undefined.  Because
-    * ES doesn't have a PointSize() command, the state point size will
-    * remain 1.0, satisfying the ES default value in the GS/TES case, and
-    * the VS case (1.0 works for "undefined").  Mesa sets the program point
-    * mode flag to always-enabled in ES, so we can safely check that, and
-    * it'll be ignored for ES.
-    *
-    * _NEW_PROGRAM | _NEW_POINT
-    * BRW_NEW_VUE_MAP_GEOM_OUT
-    */
-   return (!ctx->VertexProgram.PointSizeEnabled && !ctx->Point._Attenuated) ||
-          (brw->vue_map_geom_out.slots_valid & VARYING_BIT_PSIZ) == 0;
-}
-
-void brw_copy_pipeline_atoms(struct brw_context *brw,
-                             enum brw_pipeline pipeline,
-                             const struct brw_tracked_state **atoms,
-                             int num_atoms);
-void gfx4_init_atoms(struct brw_context *brw);
-void gfx45_init_atoms(struct brw_context *brw);
-void gfx5_init_atoms(struct brw_context *brw);
-void gfx6_init_atoms(struct brw_context *brw);
-void gfx7_init_atoms(struct brw_context *brw);
-void gfx75_init_atoms(struct brw_context *brw);
-void gfx8_init_atoms(struct brw_context *brw);
-void gfx9_init_atoms(struct brw_context *brw);
-void gfx11_init_atoms(struct brw_context *brw);
-
-static inline uint32_t
-brw_mocs(const struct isl_device *dev, struct brw_bo *bo)
-{
-   return isl_mocs(dev, 0, bo && bo->external);
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
deleted file mode 100644
index 7a6a8cd..0000000
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ /dev/null
@@ -1,789 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "brw_program.h"
-#include "drivers/common/meta.h"
-#include "brw_batch.h"
-#include "brw_buffers.h"
-#include "brw_vs.h"
-#include "brw_ff_gs.h"
-#include "brw_gs.h"
-#include "brw_wm.h"
-#include "brw_cs.h"
-#include "genxml/genX_bits.h"
-#include "main/framebuffer.h"
-
-void
-brw_enable_obj_preemption(struct brw_context *brw, bool enable)
-{
-   ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   assert(devinfo->ver >= 9);
-
-   if (enable == brw->object_preemption)
-      return;
-
-   /* A fixed function pipe flush is required before modifying this field */
-   brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
-
-   bool replay_mode = enable ?
-      GFX9_REPLAY_MODE_MIDOBJECT : GFX9_REPLAY_MODE_MIDBUFFER;
-
-   /* enable object level preemption */
-   brw_load_register_imm32(brw, CS_CHICKEN1,
-                           replay_mode | GFX9_REPLAY_MODE_MASK);
-
-   brw->object_preemption = enable;
-}
-
-static void
-brw_upload_gfx11_slice_hashing_state(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   int subslices_delta =
-      devinfo->ppipe_subslices[0] - devinfo->ppipe_subslices[1];
-   if (subslices_delta == 0)
-      return;
-
-   unsigned size = GFX11_SLICE_HASH_TABLE_length * 4;
-   uint32_t hash_address;
-
-   uint32_t *map = brw_state_batch(brw, size, 64, &hash_address);
-
-   unsigned idx = 0;
-
-   unsigned sl_small = 0;
-   unsigned sl_big = 1;
-   if (subslices_delta > 0) {
-      sl_small = 1;
-      sl_big = 0;
-   }
-
-   /**
-    * Create a 16x16 slice hashing table like the following one:
-    *
-    * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
-    * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
-    * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
-    * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
-    * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
-    * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
-    * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
-    * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
-    * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
-    * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
-    * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
-    * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
-    * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
-    * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
-    * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
-    * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
-    *
-    * The table above is used when the pixel pipe 0 has less subslices than
-    * pixel pipe 1. When pixel pipe 0 has more subslices, then a similar table
-    * with 0's and 1's inverted is used.
-    */
-   for (int i = 0; i < GFX11_SLICE_HASH_TABLE_length; i++) {
-      uint32_t dw = 0;
-
-      for (int j = 0; j < 8; j++) {
-         unsigned slice = idx++ % 3 ? sl_big : sl_small;
-         dw |= slice << (j * 4);
-      }
-      map[i] = dw;
-   }
-
-   BEGIN_BATCH(2);
-   OUT_BATCH(_3DSTATE_SLICE_TABLE_STATE_POINTERS << 16 | (2 - 2));
-   OUT_RELOC(brw->batch.state.bo, 0, hash_address | 1);
-   ADVANCE_BATCH();
-
-   /* From gfx10/gfx11 workaround table in h/w specs:
-    *
-    *    "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
-    *     a value of 0xFFFF"
-    *
-    * This means that whenever we update a field with this instruction, we need
-    * to update all the others.
-    *
-    * Since this is the first time we emit this
-    * instruction, we are only setting the fSLICE_HASHING_TABLE_ENABLE flag,
-    * and leaving everything else at their default state (0).
-    */
-   BEGIN_BATCH(2);
-   OUT_BATCH(_3DSTATE_3D_MODE  << 16 | (2 - 2));
-   OUT_BATCH(0xffff0000 | SLICE_HASHING_TABLE_ENABLE);
-   ADVANCE_BATCH();
-}
-
-static void
-brw_upload_initial_gpu_state(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const struct brw_compiler *compiler = brw->screen->compiler;
-
-   /* On platforms with hardware contexts, we can set our initial GPU state
-    * right away rather than doing it via state atoms.  This saves a small
-    * amount of overhead on every draw call.
-    */
-   if (!brw->hw_ctx)
-      return;
-
-   if (devinfo->ver == 6)
-      brw_emit_post_sync_nonzero_flush(brw);
-
-   brw_upload_invariant_state(brw);
-
-   if (devinfo->ver == 11) {
-      /* The default behavior of bit 5 "Headerless Message for Pre-emptable
-       * Contexts" in SAMPLER MODE register is set to 0, which means
-       * headerless sampler messages are not allowed for pre-emptable
-       * contexts. Set the bit 5 to 1 to allow them.
-       */
-      brw_load_register_imm32(brw, GFX11_SAMPLER_MODE,
-                              HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK |
-                              HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS);
-
-      /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
-       * HALF_SLICE_CHICKEN7 register.
-       */
-      brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7,
-                              TEXEL_OFFSET_FIX_MASK |
-                              TEXEL_OFFSET_FIX_ENABLE);
-
-      /* Wa_1406697149: Bit 9 "Error Detection Behavior Control" must be set
-       * in L3CNTLREG register. The default setting of the bit is not the
-       * desirable behavior.
-       */
-      brw_load_register_imm32(brw, GFX8_L3CNTLREG,
-                              GFX8_L3CNTLREG_EDBC_NO_HANG);
-   }
-
-   /* hardware specification recommends disabling repacking for
-    * the compatibility with decompression mechanism in display controller.
-    */
-   if (devinfo->disable_ccs_repack) {
-      brw_load_register_imm32(brw, GFX7_CACHE_MODE_0,
-                              GFX11_DISABLE_REPACKING_FOR_COMPRESSION |
-                              REG_MASK(GFX11_DISABLE_REPACKING_FOR_COMPRESSION));
-   }
-
-   if (devinfo->ver == 9) {
-      /* Recommended optimizations for Victim Cache eviction and floating
-       * point blending.
-       */
-      brw_load_register_imm32(brw, GFX7_CACHE_MODE_1,
-                              REG_MASK(GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
-                              REG_MASK(GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT) |
-                              REG_MASK(GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
-                              GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
-                              GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT |
-                              GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC);
-   }
-
-   if (devinfo->ver >= 8) {
-      gfx8_emit_3dstate_sample_pattern(brw);
-
-      BEGIN_BATCH(5);
-      OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-
-      BEGIN_BATCH(2);
-      OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
-    * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
-    *
-    * This is only safe on kernels with context isolation support.
-    */
-   if (!compiler->constant_buffer_0_is_relative) {
-      if (devinfo->ver >= 9) {
-         BEGIN_BATCH(3);
-         OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
-         OUT_BATCH(CS_DEBUG_MODE2);
-         OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
-                   CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
-         ADVANCE_BATCH();
-      } else if (devinfo->ver == 8) {
-         BEGIN_BATCH(3);
-         OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
-         OUT_BATCH(INSTPM);
-         OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
-                   INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
-         ADVANCE_BATCH();
-      }
-   }
-
-   brw->object_preemption = false;
-
-   if (devinfo->ver >= 10)
-      brw_enable_obj_preemption(brw, true);
-
-   if (devinfo->ver == 11)
-      brw_upload_gfx11_slice_hashing_state(brw);
-}
-
-static inline const struct brw_tracked_state *
-brw_get_pipeline_atoms(struct brw_context *brw,
-                       enum brw_pipeline pipeline)
-{
-   switch (pipeline) {
-   case BRW_RENDER_PIPELINE:
-      return brw->render_atoms;
-   case BRW_COMPUTE_PIPELINE:
-      return brw->compute_atoms;
-   default:
-      STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
-      unreachable("Unsupported pipeline");
-      return NULL;
-   }
-}
-
-void
-brw_copy_pipeline_atoms(struct brw_context *brw,
-                        enum brw_pipeline pipeline,
-                        const struct brw_tracked_state **atoms,
-                        int num_atoms)
-{
-   /* This is to work around brw_context::atoms being declared const.  We want
-    * it to be const, but it needs to be initialized somehow!
-    */
-   struct brw_tracked_state *context_atoms =
-      (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
-
-   for (int i = 0; i < num_atoms; i++) {
-      context_atoms[i] = *atoms[i];
-      assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
-      assert(context_atoms[i].emit);
-   }
-
-   brw->num_atoms[pipeline] = num_atoms;
-}
-
-void brw_init_state( struct brw_context *brw )
-{
-   struct gl_context *ctx = &brw->ctx;
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   /* Force the first brw_select_pipeline to emit pipeline select */
-   brw->last_pipeline = BRW_NUM_PIPELINES;
-
-   brw_init_caches(brw);
-
-   if (devinfo->ver >= 11)
-      gfx11_init_atoms(brw);
-   else if (devinfo->ver >= 10)
-      unreachable("Gfx10 support dropped.");
-   else if (devinfo->ver >= 9)
-      gfx9_init_atoms(brw);
-   else if (devinfo->ver >= 8)
-      gfx8_init_atoms(brw);
-   else if (devinfo->verx10 >= 75)
-      gfx75_init_atoms(brw);
-   else if (devinfo->ver >= 7)
-      gfx7_init_atoms(brw);
-   else if (devinfo->ver >= 6)
-      gfx6_init_atoms(brw);
-   else if (devinfo->ver >= 5)
-      gfx5_init_atoms(brw);
-   else if (devinfo->verx10 >= 45)
-      gfx45_init_atoms(brw);
-   else
-      gfx4_init_atoms(brw);
-
-   brw_upload_initial_gpu_state(brw);
-
-   brw->NewGLState = ~0;
-   brw->ctx.NewDriverState = ~0ull;
-
-   /* ~0 is a nonsensical value which won't match anything we program, so
-    * the programming will take effect on the first time around.
-    */
-   brw->pma_stall_bits = ~0;
-
-   /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
-    * dirty flags.
-    */
-   STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
-
-   ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
-   ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
-   ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
-   ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
-   ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
-   ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
-   ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER;
-   ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
-   ctx->DriverFlags.NewTessState = BRW_NEW_DEFAULT_TESS_LEVELS;
-   ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
-}
-
-
-void brw_destroy_state( struct brw_context *brw )
-{
-   brw_destroy_caches(brw);
-}
-
-/***********************************************************************
- */
-
-static bool
-check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
-{
-   return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
-}
-
-static void
-accumulate_state(struct brw_state_flags *a, const struct brw_state_flags *b)
-{
-   a->mesa |= b->mesa;
-   a->brw |= b->brw;
-}
-
-
-static void
-xor_states(struct brw_state_flags *result,
-           const struct brw_state_flags *a,
-           const struct brw_state_flags *b)
-{
-   result->mesa = a->mesa ^ b->mesa;
-   result->brw = a->brw ^ b->brw;
-}
-
-struct dirty_bit_map {
-   uint64_t bit;
-   char *name;
-   uint32_t count;
-};
-
-#define DEFINE_BIT(name) {name, #name, 0}
-
-static struct dirty_bit_map mesa_bits[] = {
-   DEFINE_BIT(_NEW_MODELVIEW),
-   DEFINE_BIT(_NEW_PROJECTION),
-   DEFINE_BIT(_NEW_TEXTURE_MATRIX),
-   DEFINE_BIT(_NEW_COLOR),
-   DEFINE_BIT(_NEW_DEPTH),
-   DEFINE_BIT(_NEW_FOG),
-   DEFINE_BIT(_NEW_HINT),
-   DEFINE_BIT(_NEW_LIGHT),
-   DEFINE_BIT(_NEW_LINE),
-   DEFINE_BIT(_NEW_PIXEL),
-   DEFINE_BIT(_NEW_POINT),
-   DEFINE_BIT(_NEW_POLYGON),
-   DEFINE_BIT(_NEW_POLYGONSTIPPLE),
-   DEFINE_BIT(_NEW_SCISSOR),
-   DEFINE_BIT(_NEW_STENCIL),
-   DEFINE_BIT(_NEW_TEXTURE_OBJECT),
-   DEFINE_BIT(_NEW_TRANSFORM),
-   DEFINE_BIT(_NEW_VIEWPORT),
-   DEFINE_BIT(_NEW_TEXTURE_STATE),
-   DEFINE_BIT(_NEW_RENDERMODE),
-   DEFINE_BIT(_NEW_BUFFERS),
-   DEFINE_BIT(_NEW_CURRENT_ATTRIB),
-   DEFINE_BIT(_NEW_MULTISAMPLE),
-   DEFINE_BIT(_NEW_TRACK_MATRIX),
-   DEFINE_BIT(_NEW_PROGRAM),
-   DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
-   DEFINE_BIT(_NEW_FRAG_CLAMP),
-   {0, 0, 0}
-};
-
-static struct dirty_bit_map brw_bits[] = {
-   DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
-   DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
-   DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
-   DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
-   DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
-   DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
-   DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
-   DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
-   DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
-   DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
-   DEFINE_BIT(BRW_NEW_URB_FENCE),
-   DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
-   DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
-   DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
-   DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
-   DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
-   DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
-   DEFINE_BIT(BRW_NEW_PRIMITIVE),
-   DEFINE_BIT(BRW_NEW_CONTEXT),
-   DEFINE_BIT(BRW_NEW_PSP),
-   DEFINE_BIT(BRW_NEW_SURFACES),
-   DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
-   DEFINE_BIT(BRW_NEW_INDICES),
-   DEFINE_BIT(BRW_NEW_VERTICES),
-   DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
-   DEFINE_BIT(BRW_NEW_BATCH),
-   DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
-   DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
-   DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
-   DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
-   DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
-   DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
-   DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
-   DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
-   DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
-   DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
-   DEFINE_BIT(BRW_NEW_STATS_WM),
-   DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
-   DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
-   DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
-   DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
-   DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
-   DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
-   DEFINE_BIT(BRW_NEW_GFX4_UNIT_STATE),
-   DEFINE_BIT(BRW_NEW_CC_VP),
-   DEFINE_BIT(BRW_NEW_SF_VP),
-   DEFINE_BIT(BRW_NEW_CLIP_VP),
-   DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
-   DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
-   DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
-   DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
-   DEFINE_BIT(BRW_NEW_URB_SIZE),
-   DEFINE_BIT(BRW_NEW_CC_STATE),
-   DEFINE_BIT(BRW_NEW_BLORP),
-   DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
-   DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
-   DEFINE_BIT(BRW_NEW_DRAW_CALL),
-   DEFINE_BIT(BRW_NEW_AUX_STATE),
-   {0, 0, 0}
-};
-
-static void
-brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
-{
-   for (int i = 0; bit_map[i].bit != 0; i++) {
-      if (bit_map[i].bit & bits)
-         bit_map[i].count++;
-   }
-}
-
-static void
-brw_print_dirty_count(struct dirty_bit_map *bit_map)
-{
-   for (int i = 0; bit_map[i].bit != 0; i++) {
-      if (bit_map[i].count > 1) {
-         fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
-                 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
-      }
-   }
-}
-
-static inline void
-brw_upload_tess_programs(struct brw_context *brw)
-{
-   if (brw->programs[MESA_SHADER_TESS_EVAL]) {
-      brw_upload_tcs_prog(brw);
-      brw_upload_tes_prog(brw);
-   } else {
-      brw->tcs.base.prog_data = NULL;
-      brw->tes.base.prog_data = NULL;
-   }
-}
-
-static inline void
-brw_upload_programs(struct brw_context *brw,
-                    enum brw_pipeline pipeline)
-{
-   struct gl_context *ctx = &brw->ctx;
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (pipeline == BRW_RENDER_PIPELINE) {
-      brw_upload_vs_prog(brw);
-      brw_upload_tess_programs(brw);
-
-      if (brw->programs[MESA_SHADER_GEOMETRY]) {
-         brw_upload_gs_prog(brw);
-      } else {
-         brw->gs.base.prog_data = NULL;
-         if (devinfo->ver < 7)
-            brw_upload_ff_gs_prog(brw);
-      }
-
-      /* Update the VUE map for data exiting the GS stage of the pipeline.
-       * This comes from the last enabled shader stage.
-       */
-      GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
-      bool old_separate = brw->vue_map_geom_out.separate;
-      struct brw_vue_prog_data *vue_prog_data;
-      if (brw->programs[MESA_SHADER_GEOMETRY])
-         vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
-      else if (brw->programs[MESA_SHADER_TESS_EVAL])
-         vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
-      else
-         vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
-
-      brw->vue_map_geom_out = vue_prog_data->vue_map;
-
-      /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
-      if (old_slots != brw->vue_map_geom_out.slots_valid ||
-          old_separate != brw->vue_map_geom_out.separate)
-         brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
-
-      if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
-          VARYING_BIT_VIEWPORT) {
-         ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
-         brw->clip.viewport_count =
-            (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
-            ctx->Const.MaxViewports : 1;
-      }
-
-      brw_upload_wm_prog(brw);
-
-      if (devinfo->ver < 6) {
-         brw_upload_clip_prog(brw);
-         brw_upload_sf_prog(brw);
-      }
-
-      brw_disk_cache_write_render_programs(brw);
-   } else if (pipeline == BRW_COMPUTE_PIPELINE) {
-      brw_upload_cs_prog(brw);
-      brw_disk_cache_write_compute_program(brw);
-   }
-}
-
-static inline void
-merge_ctx_state(struct brw_context *brw,
-                struct brw_state_flags *state)
-{
-   state->mesa |= brw->NewGLState;
-   state->brw |= brw->ctx.NewDriverState;
-}
-
-static ALWAYS_INLINE void
-check_and_emit_atom(struct brw_context *brw,
-                    struct brw_state_flags *state,
-                    const struct brw_tracked_state *atom)
-{
-   if (check_state(state, &atom->dirty)) {
-      atom->emit(brw);
-      merge_ctx_state(brw, state);
-   }
-}
-
-static inline void
-brw_upload_pipeline_state(struct brw_context *brw,
-                          enum brw_pipeline pipeline)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-   int i;
-   static int dirty_count = 0;
-   struct brw_state_flags state = brw->state.pipelines[pipeline];
-   const unsigned fb_samples =
-      MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
-
-   brw_select_pipeline(brw, pipeline);
-
-   if (pipeline == BRW_RENDER_PIPELINE && brw->current_hash_scale != 1)
-      brw_emit_hashing_mode(brw, UINT_MAX, UINT_MAX, 1);
-
-   if (INTEL_DEBUG(DEBUG_REEMIT)) {
-      /* Always re-emit all state. */
-      brw->NewGLState = ~0;
-      ctx->NewDriverState = ~0ull;
-   }
-
-   if (pipeline == BRW_RENDER_PIPELINE) {
-      if (brw->programs[MESA_SHADER_FRAGMENT] !=
-          ctx->FragmentProgram._Current) {
-         brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current;
-         brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
-      }
-
-      if (brw->programs[MESA_SHADER_TESS_EVAL] !=
-          ctx->TessEvalProgram._Current) {
-         brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current;
-         brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
-      }
-
-      if (brw->programs[MESA_SHADER_TESS_CTRL] !=
-          ctx->TessCtrlProgram._Current) {
-         brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current;
-         brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
-      }
-
-      if (brw->programs[MESA_SHADER_GEOMETRY] !=
-          ctx->GeometryProgram._Current) {
-         brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current;
-         brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
-      }
-
-      if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) {
-         brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current;
-         brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
-      }
-   }
-
-   if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) {
-      brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current;
-      brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
-   }
-
-   if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
-      brw->meta_in_progress = _mesa_meta_in_progress(ctx);
-      brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
-   }
-
-   if (brw->num_samples != fb_samples) {
-      brw->num_samples = fb_samples;
-      brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
-   }
-
-   /* Exit early if no state is flagged as dirty */
-   merge_ctx_state(brw, &state);
-   if ((state.mesa | state.brw) == 0)
-      return;
-
-   /* Emit Sandybridge workaround flushes on every primitive, for safety. */
-   if (devinfo->ver == 6)
-      brw_emit_post_sync_nonzero_flush(brw);
-
-   brw_upload_programs(brw, pipeline);
-   merge_ctx_state(brw, &state);
-
-   brw->vtbl.emit_state_base_address(brw);
-
-   const struct brw_tracked_state *atoms =
-      brw_get_pipeline_atoms(brw, pipeline);
-   const int num_atoms = brw->num_atoms[pipeline];
-
-   if (INTEL_DEBUG(DEBUG_ANY)) {
-      /* Debug version which enforces various sanity checks on the
-       * state flags which are generated and checked to help ensure
-       * state atoms are ordered correctly in the list.
-       */
-      struct brw_state_flags examined, prev;
-      memset(&examined, 0, sizeof(examined));
-      prev = state;
-
-      for (i = 0; i < num_atoms; i++) {
-         const struct brw_tracked_state *atom = &atoms[i];
-         struct brw_state_flags generated;
-
-         check_and_emit_atom(brw, &state, atom);
-
-         accumulate_state(&examined, &atom->dirty);
-
-         /* generated = (prev ^ state)
-          * if (examined & generated)
-          *     fail;
-          */
-         xor_states(&generated, &prev, &state);
-         assert(!check_state(&examined, &generated));
-         prev = state;
-      }
-   }
-   else {
-      for (i = 0; i < num_atoms; i++) {
-         const struct brw_tracked_state *atom = &atoms[i];
-
-         check_and_emit_atom(brw, &state, atom);
-      }
-   }
-
-   if (INTEL_DEBUG(DEBUG_STATE)) {
-      STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
-
-      brw_update_dirty_count(mesa_bits, state.mesa);
-      brw_update_dirty_count(brw_bits, state.brw);
-      if (dirty_count++ % 1000 == 0) {
-         brw_print_dirty_count(mesa_bits);
-         brw_print_dirty_count(brw_bits);
-         fprintf(stderr, "\n");
-      }
-   }
-}
-
-/***********************************************************************
- * Emit all state:
- */
-void brw_upload_render_state(struct brw_context *brw)
-{
-   brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
-}
-
-static inline void
-brw_pipeline_state_finished(struct brw_context *brw,
-                            enum brw_pipeline pipeline)
-{
-   /* Save all dirty state into the other pipelines */
-   for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
-      if (i != pipeline) {
-         brw->state.pipelines[i].mesa |= brw->NewGLState;
-         brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
-      } else {
-         memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
-      }
-   }
-
-   brw->NewGLState = 0;
-   brw->ctx.NewDriverState = 0ull;
-}
-
-/**
- * Clear dirty bits to account for the fact that the state emitted by
- * brw_upload_render_state() has been committed to the hardware. This is a
- * separate call from brw_upload_render_state() because it's possible that
- * after the call to brw_upload_render_state(), we will discover that we've
- * run out of aperture space, and need to rewind the batch buffer to the state
- * it had before the brw_upload_render_state() call.
- */
-void
-brw_render_state_finished(struct brw_context *brw)
-{
-   brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
-}
-
-void
-brw_upload_compute_state(struct brw_context *brw)
-{
-   brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
-}
-
-void
-brw_compute_state_finished(struct brw_context *brw)
-{
-   brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
-}
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
deleted file mode 100644
index c2d99be..0000000
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-
-#ifndef BRW_STRUCTS_H
-#define BRW_STRUCTS_H
-
-struct brw_urb_fence
-{
-   struct
-   {
-      unsigned length:8;
-      unsigned vs_realloc:1;
-      unsigned gs_realloc:1;
-      unsigned clp_realloc:1;
-      unsigned sf_realloc:1;
-      unsigned vfe_realloc:1;
-      unsigned cs_realloc:1;
-      unsigned pad:2;
-      unsigned opcode:16;
-   } header;
-
-   struct
-   {
-      unsigned vs_fence:10;
-      unsigned gs_fence:10;
-      unsigned clp_fence:10;
-      unsigned pad:2;
-   } bits0;
-
-   struct
-   {
-      unsigned sf_fence:10;
-      unsigned vf_fence:10;
-      unsigned cs_fence:11;
-      unsigned pad:1;
-   } bits1;
-};
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c
deleted file mode 100644
index 235c15f..0000000
--- a/src/mesa/drivers/dri/i965/brw_surface_formats.c
+++ /dev/null
@@ -1,558 +0,0 @@
-/*
- * Copyright Â© 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-#include "main/mtypes.h"
-
-#include "isl/isl.h"
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-
-enum isl_format
-brw_isl_format_for_mesa_format(mesa_format mesa_format)
-{
-   /* This table is ordered according to the enum ordering in formats.h.  We do
-    * expect that enum to be extended without our explicit initialization
-    * staying in sync, so we initialize to 0 even though
-    * ISL_FORMAT_R32G32B32A32_FLOAT happens to also be 0.
-    */
-   static const enum isl_format table[MESA_FORMAT_COUNT] = {
-      [0 ... MESA_FORMAT_COUNT-1] = ISL_FORMAT_UNSUPPORTED,
-
-      [MESA_FORMAT_R8G8B8A8_UNORM] = ISL_FORMAT_R8G8B8A8_UNORM,
-      [MESA_FORMAT_B8G8R8A8_UNORM] = ISL_FORMAT_B8G8R8A8_UNORM,
-      [MESA_FORMAT_R8G8B8X8_UNORM] = ISL_FORMAT_R8G8B8X8_UNORM,
-      [MESA_FORMAT_B8G8R8X8_UNORM] = ISL_FORMAT_B8G8R8X8_UNORM,
-      [MESA_FORMAT_RGB_UNORM8] = ISL_FORMAT_R8G8B8_UNORM,
-      [MESA_FORMAT_B5G6R5_UNORM] = ISL_FORMAT_B5G6R5_UNORM,
-      [MESA_FORMAT_B4G4R4A4_UNORM] = ISL_FORMAT_B4G4R4A4_UNORM,
-      [MESA_FORMAT_B5G5R5A1_UNORM] = ISL_FORMAT_B5G5R5A1_UNORM,
-      [MESA_FORMAT_LA_UNORM8] = ISL_FORMAT_L8A8_UNORM,
-      [MESA_FORMAT_LA_UNORM16] = ISL_FORMAT_L16A16_UNORM,
-      [MESA_FORMAT_A_UNORM8] = ISL_FORMAT_A8_UNORM,
-      [MESA_FORMAT_A_UNORM16] = ISL_FORMAT_A16_UNORM,
-      [MESA_FORMAT_L_UNORM8] = ISL_FORMAT_L8_UNORM,
-      [MESA_FORMAT_L_UNORM16] = ISL_FORMAT_L16_UNORM,
-      [MESA_FORMAT_I_UNORM8] = ISL_FORMAT_I8_UNORM,
-      [MESA_FORMAT_I_UNORM16] = ISL_FORMAT_I16_UNORM,
-      [MESA_FORMAT_YCBCR_REV] = ISL_FORMAT_YCRCB_NORMAL,
-      [MESA_FORMAT_YCBCR] = ISL_FORMAT_YCRCB_SWAPUVY,
-      [MESA_FORMAT_R_UNORM8] = ISL_FORMAT_R8_UNORM,
-      [MESA_FORMAT_RG_UNORM8] = ISL_FORMAT_R8G8_UNORM,
-      [MESA_FORMAT_R_UNORM16] = ISL_FORMAT_R16_UNORM,
-      [MESA_FORMAT_RG_UNORM16] = ISL_FORMAT_R16G16_UNORM,
-      [MESA_FORMAT_B10G10R10A2_UNORM] = ISL_FORMAT_B10G10R10A2_UNORM,
-      [MESA_FORMAT_S_UINT8] = ISL_FORMAT_R8_UINT,
-
-      [MESA_FORMAT_B8G8R8A8_SRGB] = ISL_FORMAT_B8G8R8A8_UNORM_SRGB,
-      [MESA_FORMAT_R8G8B8A8_SRGB] = ISL_FORMAT_R8G8B8A8_UNORM_SRGB,
-      [MESA_FORMAT_B8G8R8X8_SRGB] = ISL_FORMAT_B8G8R8X8_UNORM_SRGB,
-      [MESA_FORMAT_R_SRGB8] = ISL_FORMAT_L8_UNORM_SRGB,
-      [MESA_FORMAT_L_SRGB8] = ISL_FORMAT_L8_UNORM_SRGB,
-      [MESA_FORMAT_LA_SRGB8] = ISL_FORMAT_L8A8_UNORM_SRGB,
-      [MESA_FORMAT_SRGB_DXT1] = ISL_FORMAT_BC1_UNORM_SRGB,
-      [MESA_FORMAT_SRGBA_DXT1] = ISL_FORMAT_BC1_UNORM_SRGB,
-      [MESA_FORMAT_SRGBA_DXT3] = ISL_FORMAT_BC2_UNORM_SRGB,
-      [MESA_FORMAT_SRGBA_DXT5] = ISL_FORMAT_BC3_UNORM_SRGB,
-
-      [MESA_FORMAT_RGB_FXT1] = ISL_FORMAT_FXT1,
-      [MESA_FORMAT_RGBA_FXT1] = ISL_FORMAT_FXT1,
-      [MESA_FORMAT_RGB_DXT1] = ISL_FORMAT_BC1_UNORM,
-      [MESA_FORMAT_RGBA_DXT1] = ISL_FORMAT_BC1_UNORM,
-      [MESA_FORMAT_RGBA_DXT3] = ISL_FORMAT_BC2_UNORM,
-      [MESA_FORMAT_RGBA_DXT5] = ISL_FORMAT_BC3_UNORM,
-
-      [MESA_FORMAT_RGBA_FLOAT32] = ISL_FORMAT_R32G32B32A32_FLOAT,
-      [MESA_FORMAT_RGBA_FLOAT16] = ISL_FORMAT_R16G16B16A16_FLOAT,
-      [MESA_FORMAT_RGB_FLOAT32] = ISL_FORMAT_R32G32B32_FLOAT,
-      [MESA_FORMAT_A_FLOAT32] = ISL_FORMAT_A32_FLOAT,
-      [MESA_FORMAT_A_FLOAT16] = ISL_FORMAT_A16_FLOAT,
-      [MESA_FORMAT_L_FLOAT32] = ISL_FORMAT_L32_FLOAT,
-      [MESA_FORMAT_L_FLOAT16] = ISL_FORMAT_L16_FLOAT,
-      [MESA_FORMAT_LA_FLOAT32] = ISL_FORMAT_L32A32_FLOAT,
-      [MESA_FORMAT_LA_FLOAT16] = ISL_FORMAT_L16A16_FLOAT,
-      [MESA_FORMAT_I_FLOAT32] = ISL_FORMAT_I32_FLOAT,
-      [MESA_FORMAT_I_FLOAT16] = ISL_FORMAT_I16_FLOAT,
-      [MESA_FORMAT_R_FLOAT32] = ISL_FORMAT_R32_FLOAT,
-      [MESA_FORMAT_R_FLOAT16] = ISL_FORMAT_R16_FLOAT,
-      [MESA_FORMAT_RG_FLOAT32] = ISL_FORMAT_R32G32_FLOAT,
-      [MESA_FORMAT_RG_FLOAT16] = ISL_FORMAT_R16G16_FLOAT,
-
-      [MESA_FORMAT_R_SINT8] = ISL_FORMAT_R8_SINT,
-      [MESA_FORMAT_RG_SINT8] = ISL_FORMAT_R8G8_SINT,
-      [MESA_FORMAT_RGB_SINT8] = ISL_FORMAT_R8G8B8_SINT,
-      [MESA_FORMAT_RGBA_SINT8] = ISL_FORMAT_R8G8B8A8_SINT,
-      [MESA_FORMAT_R_SINT16] = ISL_FORMAT_R16_SINT,
-      [MESA_FORMAT_RG_SINT16] = ISL_FORMAT_R16G16_SINT,
-      [MESA_FORMAT_RGB_SINT16] = ISL_FORMAT_R16G16B16_SINT,
-      [MESA_FORMAT_RGBA_SINT16] = ISL_FORMAT_R16G16B16A16_SINT,
-      [MESA_FORMAT_R_SINT32] = ISL_FORMAT_R32_SINT,
-      [MESA_FORMAT_RG_SINT32] = ISL_FORMAT_R32G32_SINT,
-      [MESA_FORMAT_RGB_SINT32] = ISL_FORMAT_R32G32B32_SINT,
-      [MESA_FORMAT_RGBA_SINT32] = ISL_FORMAT_R32G32B32A32_SINT,
-
-      [MESA_FORMAT_R_UINT8] = ISL_FORMAT_R8_UINT,
-      [MESA_FORMAT_RG_UINT8] = ISL_FORMAT_R8G8_UINT,
-      [MESA_FORMAT_RGB_UINT8] = ISL_FORMAT_R8G8B8_UINT,
-      [MESA_FORMAT_RGBA_UINT8] = ISL_FORMAT_R8G8B8A8_UINT,
-      [MESA_FORMAT_R_UINT16] = ISL_FORMAT_R16_UINT,
-      [MESA_FORMAT_RG_UINT16] = ISL_FORMAT_R16G16_UINT,
-      [MESA_FORMAT_RGB_UINT16] = ISL_FORMAT_R16G16B16_UINT,
-      [MESA_FORMAT_RGBA_UINT16] = ISL_FORMAT_R16G16B16A16_UINT,
-      [MESA_FORMAT_R_UINT32] = ISL_FORMAT_R32_UINT,
-      [MESA_FORMAT_RG_UINT32] = ISL_FORMAT_R32G32_UINT,
-      [MESA_FORMAT_RGB_UINT32] = ISL_FORMAT_R32G32B32_UINT,
-      [MESA_FORMAT_RGBA_UINT32] = ISL_FORMAT_R32G32B32A32_UINT,
-
-      [MESA_FORMAT_R_SNORM8] = ISL_FORMAT_R8_SNORM,
-      [MESA_FORMAT_RG_SNORM8] = ISL_FORMAT_R8G8_SNORM,
-      [MESA_FORMAT_R8G8B8A8_SNORM] = ISL_FORMAT_R8G8B8A8_SNORM,
-      [MESA_FORMAT_R_SNORM16] = ISL_FORMAT_R16_SNORM,
-      [MESA_FORMAT_RG_SNORM16] = ISL_FORMAT_R16G16_SNORM,
-      [MESA_FORMAT_RGB_SNORM16] = ISL_FORMAT_R16G16B16_SNORM,
-      [MESA_FORMAT_RGBA_SNORM16] = ISL_FORMAT_R16G16B16A16_SNORM,
-      [MESA_FORMAT_RGBA_UNORM16] = ISL_FORMAT_R16G16B16A16_UNORM,
-
-      [MESA_FORMAT_R_RGTC1_UNORM] = ISL_FORMAT_BC4_UNORM,
-      [MESA_FORMAT_R_RGTC1_SNORM] = ISL_FORMAT_BC4_SNORM,
-      [MESA_FORMAT_RG_RGTC2_UNORM] = ISL_FORMAT_BC5_UNORM,
-      [MESA_FORMAT_RG_RGTC2_SNORM] = ISL_FORMAT_BC5_SNORM,
-
-      [MESA_FORMAT_ETC1_RGB8] = ISL_FORMAT_ETC1_RGB8,
-      [MESA_FORMAT_ETC2_RGB8] = ISL_FORMAT_ETC2_RGB8,
-      [MESA_FORMAT_ETC2_SRGB8] = ISL_FORMAT_ETC2_SRGB8,
-      [MESA_FORMAT_ETC2_RGBA8_EAC] = ISL_FORMAT_ETC2_EAC_RGBA8,
-      [MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = ISL_FORMAT_ETC2_EAC_SRGB8_A8,
-      [MESA_FORMAT_ETC2_R11_EAC] = ISL_FORMAT_EAC_R11,
-      [MESA_FORMAT_ETC2_RG11_EAC] = ISL_FORMAT_EAC_RG11,
-      [MESA_FORMAT_ETC2_SIGNED_R11_EAC] = ISL_FORMAT_EAC_SIGNED_R11,
-      [MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = ISL_FORMAT_EAC_SIGNED_RG11,
-      [MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] = ISL_FORMAT_ETC2_RGB8_PTA,
-      [MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] = ISL_FORMAT_ETC2_SRGB8_PTA,
-
-      [MESA_FORMAT_BPTC_RGBA_UNORM] = ISL_FORMAT_BC7_UNORM,
-      [MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM] = ISL_FORMAT_BC7_UNORM_SRGB,
-      [MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT] = ISL_FORMAT_BC6H_SF16,
-      [MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT] = ISL_FORMAT_BC6H_UF16,
-
-      [MESA_FORMAT_RGBA_ASTC_4x4]           = ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16,
-      [MESA_FORMAT_RGBA_ASTC_5x4]           = ISL_FORMAT_ASTC_LDR_2D_5X4_FLT16,
-      [MESA_FORMAT_RGBA_ASTC_5x5]           = ISL_FORMAT_ASTC_LDR_2D_5X5_FLT16,
-      [MESA_FORMAT_RGBA_ASTC_6x5]           = ISL_FORMAT_ASTC_LDR_2D_6X5_FLT16,
-      [MESA_FORMAT_RGBA_ASTC_6x6]           = ISL_FORMAT_ASTC_LDR_2D_6X6_FLT16,
-      [MESA_FORMAT_RGBA_ASTC_8x5]           = ISL_FORMAT_ASTC_LDR_2D_8X5_FLT16,
-      [MESA_FORMAT_RGBA_ASTC_8x6]           = ISL_FORMAT_ASTC_LDR_2D_8X6_FLT16,
-      [MESA_FORMAT_RGBA_ASTC_8x8]           = ISL_FORMAT_ASTC_LDR_2D_8X8_FLT16,
-      [MESA_FORMAT_RGBA_ASTC_10x5]          = ISL_FORMAT_ASTC_LDR_2D_10X5_FLT16,
-      [MESA_FORMAT_RGBA_ASTC_10x6]          = ISL_FORMAT_ASTC_LDR_2D_10X6_FLT16,
-      [MESA_FORMAT_RGBA_ASTC_10x8]          = ISL_FORMAT_ASTC_LDR_2D_10X8_FLT16,
-      [MESA_FORMAT_RGBA_ASTC_10x10]         = ISL_FORMAT_ASTC_LDR_2D_10X10_FLT16,
-      [MESA_FORMAT_RGBA_ASTC_12x10]         = ISL_FORMAT_ASTC_LDR_2D_12X10_FLT16,
-      [MESA_FORMAT_RGBA_ASTC_12x12]         = ISL_FORMAT_ASTC_LDR_2D_12X12_FLT16,
-      [MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4]   = ISL_FORMAT_ASTC_LDR_2D_4X4_U8SRGB,
-      [MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4]   = ISL_FORMAT_ASTC_LDR_2D_5X4_U8SRGB,
-      [MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5]   = ISL_FORMAT_ASTC_LDR_2D_5X5_U8SRGB,
-      [MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5]   = ISL_FORMAT_ASTC_LDR_2D_6X5_U8SRGB,
-      [MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6]   = ISL_FORMAT_ASTC_LDR_2D_6X6_U8SRGB,
-      [MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5]   = ISL_FORMAT_ASTC_LDR_2D_8X5_U8SRGB,
-      [MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6]   = ISL_FORMAT_ASTC_LDR_2D_8X6_U8SRGB,
-      [MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8]   = ISL_FORMAT_ASTC_LDR_2D_8X8_U8SRGB,
-      [MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5]  = ISL_FORMAT_ASTC_LDR_2D_10X5_U8SRGB,
-      [MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6]  = ISL_FORMAT_ASTC_LDR_2D_10X6_U8SRGB,
-      [MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8]  = ISL_FORMAT_ASTC_LDR_2D_10X8_U8SRGB,
-      [MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10] = ISL_FORMAT_ASTC_LDR_2D_10X10_U8SRGB,
-      [MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10] = ISL_FORMAT_ASTC_LDR_2D_12X10_U8SRGB,
-      [MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12] = ISL_FORMAT_ASTC_LDR_2D_12X12_U8SRGB,
-
-      [MESA_FORMAT_R9G9B9E5_FLOAT] = ISL_FORMAT_R9G9B9E5_SHAREDEXP,
-      [MESA_FORMAT_R11G11B10_FLOAT] = ISL_FORMAT_R11G11B10_FLOAT,
-
-      [MESA_FORMAT_R10G10B10A2_UNORM] = ISL_FORMAT_R10G10B10A2_UNORM,
-      [MESA_FORMAT_B10G10R10A2_UINT] = ISL_FORMAT_B10G10R10A2_UINT,
-      [MESA_FORMAT_R10G10B10A2_UINT] = ISL_FORMAT_R10G10B10A2_UINT,
-
-      [MESA_FORMAT_B5G5R5X1_UNORM] = ISL_FORMAT_B5G5R5X1_UNORM,
-      [MESA_FORMAT_R8G8B8X8_SRGB] = ISL_FORMAT_R8G8B8X8_UNORM_SRGB,
-      [MESA_FORMAT_B10G10R10X2_UNORM] = ISL_FORMAT_B10G10R10X2_UNORM,
-      [MESA_FORMAT_RGBX_UNORM16] = ISL_FORMAT_R16G16B16X16_UNORM,
-      [MESA_FORMAT_RGBX_FLOAT16] = ISL_FORMAT_R16G16B16X16_FLOAT,
-      [MESA_FORMAT_RGBX_FLOAT32] = ISL_FORMAT_R32G32B32X32_FLOAT,
-   };
-
-   assert(mesa_format < MESA_FORMAT_COUNT);
-   return table[mesa_format];
-}
-
-void
-brw_screen_init_surface_formats(struct brw_screen *screen)
-{
-   const struct intel_device_info *devinfo = &screen->devinfo;
-   mesa_format format;
-
-   memset(&screen->mesa_format_supports_texture, 0,
-          sizeof(screen->mesa_format_supports_texture));
-
-   for (format = MESA_FORMAT_NONE + 1; format < MESA_FORMAT_COUNT; format++) {
-      if (!_mesa_get_format_name(format))
-         continue;
-      enum isl_format texture, render;
-      bool is_integer = _mesa_is_format_integer_color(format);
-
-      render = texture = brw_isl_format_for_mesa_format(format);
-
-      /* Only exposed with EXT_memory_object_* support which
-       * is not for older gens.
-       */
-      if (devinfo->ver < 7 && format == MESA_FORMAT_Z_UNORM16)
-         continue;
-
-      if (texture == ISL_FORMAT_UNSUPPORTED)
-         continue;
-
-      /* Don't advertise 8 and 16-bit RGB formats to core mesa.  This ensures
-       * that they are renderable from an API perspective since core mesa will
-       * fall back to RGBA or RGBX (we can't render to non-power-of-two
-       * formats).  For 8-bit, formats, this also keeps us from hitting some
-       * nasty corners in brw_miptree_map_blit if you ever try to map one.
-       */
-      int format_size = _mesa_get_format_bytes(format);
-      if (format_size == 3 || format_size == 6)
-         continue;
-
-      if (isl_format_supports_sampling(devinfo, texture) &&
-          (isl_format_supports_filtering(devinfo, texture) || is_integer))
-         screen->mesa_format_supports_texture[format] = true;
-
-      /* Re-map some render target formats to make them supported when they
-       * wouldn't be using their format for texturing.
-       */
-      switch (render) {
-         /* For these formats, we just need to read/write the first
-          * channel into R, which is to say that we just treat them as
-          * GL_RED.
-          */
-      case ISL_FORMAT_I32_FLOAT:
-      case ISL_FORMAT_L32_FLOAT:
-         render = ISL_FORMAT_R32_FLOAT;
-         break;
-      case ISL_FORMAT_I16_FLOAT:
-      case ISL_FORMAT_L16_FLOAT:
-         render = ISL_FORMAT_R16_FLOAT;
-         break;
-      case ISL_FORMAT_I8_UNORM:
-      case ISL_FORMAT_L8_UNORM:
-         render = ISL_FORMAT_R8_UNORM;
-         break;
-      case ISL_FORMAT_I16_UNORM:
-      case ISL_FORMAT_L16_UNORM:
-         render = ISL_FORMAT_R16_UNORM;
-         break;
-      case ISL_FORMAT_R16G16B16X16_UNORM:
-         render = ISL_FORMAT_R16G16B16A16_UNORM;
-         break;
-      case ISL_FORMAT_R16G16B16X16_FLOAT:
-         render = ISL_FORMAT_R16G16B16A16_FLOAT;
-         break;
-      case ISL_FORMAT_B8G8R8X8_UNORM:
-         /* XRGB is handled as ARGB because the chips in this family
-          * cannot render to XRGB targets.  This means that we have to
-          * mask writes to alpha (ala glColorMask) and reconfigure the
-          * alpha blending hardware to use GL_ONE (or GL_ZERO) for
-          * cases where GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is
-          * used. On Gfx8+ BGRX is actually allowed (but not RGBX).
-          */
-         if (!isl_format_supports_rendering(devinfo, texture))
-            render = ISL_FORMAT_B8G8R8A8_UNORM;
-         break;
-      case ISL_FORMAT_B8G8R8X8_UNORM_SRGB:
-         if (!isl_format_supports_rendering(devinfo, texture))
-            render = ISL_FORMAT_B8G8R8A8_UNORM_SRGB;
-         break;
-      case ISL_FORMAT_R8G8B8X8_UNORM:
-         render = ISL_FORMAT_R8G8B8A8_UNORM;
-         break;
-      case ISL_FORMAT_R8G8B8X8_UNORM_SRGB:
-         render = ISL_FORMAT_R8G8B8A8_UNORM_SRGB;
-         break;
-      default:
-         break;
-      }
-
-      /* Note that GL_EXT_texture_integer says that blending doesn't occur for
-       * integer, so we don't need hardware support for blending on it.  Other
-       * than that, GL in general requires alpha blending for render targets,
-       * even though we don't support it for some formats.
-       */
-      if (isl_format_supports_rendering(devinfo, render) &&
-          (isl_format_supports_alpha_blending(devinfo, render) || is_integer)) {
-         screen->mesa_to_isl_render_format[format] = render;
-         screen->mesa_format_supports_render[format] = true;
-      }
-   }
-
-   /* We will check this table for FBO completeness, but the surface format
-    * table above only covered color rendering.
-    */
-   screen->mesa_format_supports_render[MESA_FORMAT_Z24_UNORM_S8_UINT] = true;
-   screen->mesa_format_supports_render[MESA_FORMAT_Z24_UNORM_X8_UINT] = true;
-   screen->mesa_format_supports_render[MESA_FORMAT_S_UINT8] = true;
-   screen->mesa_format_supports_render[MESA_FORMAT_Z_FLOAT32] = true;
-   screen->mesa_format_supports_render[MESA_FORMAT_Z32_FLOAT_S8X24_UINT] = true;
-   if (devinfo->ver >= 8)
-      screen->mesa_format_supports_render[MESA_FORMAT_Z_UNORM16] = true;
-
-   /* We remap depth formats to a supported texturing format in
-    * translate_tex_format().
-    */
-   screen->mesa_format_supports_texture[MESA_FORMAT_Z24_UNORM_S8_UINT] = true;
-   screen->mesa_format_supports_texture[MESA_FORMAT_Z24_UNORM_X8_UINT] = true;
-   screen->mesa_format_supports_texture[MESA_FORMAT_Z_FLOAT32] = true;
-   screen->mesa_format_supports_texture[MESA_FORMAT_Z32_FLOAT_S8X24_UINT] = true;
-   screen->mesa_format_supports_texture[MESA_FORMAT_S_UINT8] = true;
-
-   /* Benchmarking shows that Z16 is slower than Z24, so there's no reason to
-    * use it unless you're under memory (not memory bandwidth) pressure.
-    *
-    * Apparently, the GPU's depth scoreboarding works on a 32-bit granularity,
-    * which corresponds to one pixel in the depth buffer for Z24 or Z32 formats.
-    * However, it corresponds to two pixels with Z16, which means both need to
-    * hit the early depth case in order for it to happen.
-    *
-    * Other speculation is that we may be hitting increased fragment shader
-    * execution from GL_LEQUAL/GL_EQUAL depth tests at reduced precision.
-    *
-    * With the PMA stall workaround in place, Z16 is faster than Z24, as it
-    * should be.
-    */
-   if (devinfo->ver >= 8)
-      screen->mesa_format_supports_texture[MESA_FORMAT_Z_UNORM16] = true;
-
-   /* The RGBX formats are not renderable. Normally these get mapped
-    * internally to RGBA formats when rendering. However on Gfx9+ when this
-    * internal override is used fast clears don't work so they are disabled in
-    * brw_meta_fast_clear. To avoid this problem we can just pretend not to
-    * support RGBX formats at all. This will cause the upper layers of Mesa to
-    * pick the RGBA formats instead. This works fine because when it is used
-    * as a texture source the swizzle state is programmed to force the alpha
-    * channel to 1.0 anyway. We could also do this for all gens except that
-    * it's a bit more difficult when the hardware doesn't support texture
-    * swizzling. Gens using the blorp have further problems because that
-    * doesn't implement this swizzle override. We don't need to do this for
-    * BGRX because that actually is supported natively on Gfx8+.
-    */
-   if (devinfo->ver >= 9) {
-      static const mesa_format rgbx_formats[] = {
-         MESA_FORMAT_R8G8B8X8_UNORM,
-         MESA_FORMAT_R8G8B8X8_SRGB,
-         MESA_FORMAT_RGBX_UNORM16,
-         MESA_FORMAT_RGBX_FLOAT16,
-         MESA_FORMAT_RGBX_FLOAT32
-      };
-
-      for (int i = 0; i < ARRAY_SIZE(rgbx_formats); i++) {
-         screen->mesa_format_supports_texture[rgbx_formats[i]] = false;
-         screen->mesa_format_supports_render[rgbx_formats[i]] = false;
-      }
-   }
-
-   /* On hardware that lacks support for ETC1, we map ETC1 to RGBX
-    * during glCompressedTexImage2D(). See brw_mipmap_tree::wraps_etc1.
-    */
-   screen->mesa_format_supports_texture[MESA_FORMAT_ETC1_RGB8] = true;
-
-   /* On hardware that lacks support for ETC2, we map ETC2 to a suitable
-    * MESA_FORMAT during glCompressedTexImage2D().
-    * See brw_mipmap_tree::wraps_etc2.
-    */
-   screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RGB8] = true;
-   screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SRGB8] = true;
-   screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RGBA8_EAC] = true;
-   screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = true;
-   screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_R11_EAC] = true;
-   screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RG11_EAC] = true;
-   screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SIGNED_R11_EAC] = true;
-   screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = true;
-   screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] = true;
-   screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] = true;
-}
-
-void
-brw_init_surface_formats(struct brw_context *brw)
-{
-   struct brw_screen *screen = brw->screen;
-   struct gl_context *ctx = &brw->ctx;
-
-   brw->mesa_format_supports_render = screen->mesa_format_supports_render;
-   brw->mesa_to_isl_render_format = screen->mesa_to_isl_render_format;
-
-   STATIC_ASSERT(ARRAY_SIZE(ctx->TextureFormatSupported) ==
-                 ARRAY_SIZE(screen->mesa_format_supports_texture));
-
-   for (unsigned i = 0; i < ARRAY_SIZE(ctx->TextureFormatSupported); ++i) {
-      ctx->TextureFormatSupported[i] = screen->mesa_format_supports_texture[i];
-   }
-}
-
-bool
-brw_render_target_supported(struct brw_context *brw,
-                            struct gl_renderbuffer *rb)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   mesa_format format = rb->Format;
-
-   /* Many integer formats are promoted to RGBA (like XRGB8888 is), which means
-    * we would consider them renderable even though we don't have surface
-    * support for their alpha behavior and don't have the blending unit
-    * available to fake it like we do for XRGB8888.  Force them to being
-    * unsupported.
-    */
-   if (_mesa_is_format_integer_color(format) &&
-       rb->_BaseFormat != GL_RGBA &&
-       rb->_BaseFormat != GL_RG &&
-       rb->_BaseFormat != GL_RED)
-      return false;
-
-   /* Under some conditions, MSAA is not supported for formats whose width is
-    * more than 64 bits.
-    */
-   if (devinfo->ver < 8 &&
-       rb->NumSamples > 0 && _mesa_get_format_bytes(format) > 8) {
-      /* Gfx6: MSAA on >64 bit formats is unsupported. */
-      if (devinfo->ver <= 6)
-         return false;
-
-      /* Gfx7: 8x MSAA on >64 bit formats is unsupported. */
-      if (rb->NumSamples >= 8)
-         return false;
-   }
-
-   return brw->mesa_format_supports_render[format];
-}
-
-enum isl_format
-translate_tex_format(struct brw_context *brw,
-                     mesa_format mesa_format,
-                     GLenum srgb_decode)
-{
-   struct gl_context *ctx = &brw->ctx;
-   if (srgb_decode == GL_SKIP_DECODE_EXT)
-      mesa_format = _mesa_get_srgb_format_linear(mesa_format);
-
-   switch( mesa_format ) {
-
-   case MESA_FORMAT_Z_UNORM16:
-      return ISL_FORMAT_R16_UNORM;
-
-   case MESA_FORMAT_Z24_UNORM_S8_UINT:
-   case MESA_FORMAT_Z24_UNORM_X8_UINT:
-      return ISL_FORMAT_R24_UNORM_X8_TYPELESS;
-
-   case MESA_FORMAT_Z_FLOAT32:
-      return ISL_FORMAT_R32_FLOAT;
-
-   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
-      return ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS;
-
-   case MESA_FORMAT_RGBA_FLOAT32:
-      /* The value of this ISL surface format is 0, which tricks the
-       * assertion below.
-       */
-      return ISL_FORMAT_R32G32B32A32_FLOAT;
-
-   case MESA_FORMAT_RGBA_ASTC_4x4:
-   case MESA_FORMAT_RGBA_ASTC_5x4:
-   case MESA_FORMAT_RGBA_ASTC_5x5:
-   case MESA_FORMAT_RGBA_ASTC_6x5:
-   case MESA_FORMAT_RGBA_ASTC_6x6:
-   case MESA_FORMAT_RGBA_ASTC_8x5:
-   case MESA_FORMAT_RGBA_ASTC_8x6:
-   case MESA_FORMAT_RGBA_ASTC_8x8:
-   case MESA_FORMAT_RGBA_ASTC_10x5:
-   case MESA_FORMAT_RGBA_ASTC_10x6:
-   case MESA_FORMAT_RGBA_ASTC_10x8:
-   case MESA_FORMAT_RGBA_ASTC_10x10:
-   case MESA_FORMAT_RGBA_ASTC_12x10:
-   case MESA_FORMAT_RGBA_ASTC_12x12: {
-      enum isl_format isl_fmt =
-         brw_isl_format_for_mesa_format(mesa_format);
-
-      /**
-       * It is possible to process these formats using the LDR Profile
-       * or the Full Profile mode of the hardware. Because, it isn't
-       * possible to determine if an HDR or LDR texture is being rendered, we
-       * can't determine which mode to enable in the hardware. Therefore, to
-       * handle all cases, always default to Full profile unless we are
-       * processing sRGBs, which are incompatible with this mode.
-       */
-      if (ctx->Extensions.KHR_texture_compression_astc_hdr)
-         isl_fmt |= GFX9_SURFACE_ASTC_HDR_FORMAT_BIT;
-
-      return isl_fmt;
-   }
-
-   default:
-      return brw_isl_format_for_mesa_format(mesa_format);
-   }
-}
-
-/**
- * Convert a MESA_FORMAT to the corresponding BRW_DEPTHFORMAT enum.
- */
-uint32_t
-brw_depth_format(struct brw_context *brw, mesa_format format)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   switch (format) {
-   case MESA_FORMAT_Z_UNORM16:
-      return BRW_DEPTHFORMAT_D16_UNORM;
-   case MESA_FORMAT_Z_FLOAT32:
-      return BRW_DEPTHFORMAT_D32_FLOAT;
-   case MESA_FORMAT_Z24_UNORM_X8_UINT:
-      if (devinfo->ver >= 6) {
-         return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
-      } else {
-         /* Use D24_UNORM_S8, not D24_UNORM_X8.
-          *
-          * D24_UNORM_X8 was not introduced until Gfx5. (See the Ironlake PRM,
-          * Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits
-          * 3DSTATE_DEPTH_BUFFER.Surface_Format).
-          *
-          * However, on Gfx5, D24_UNORM_X8 may be used only if separate
-          * stencil is enabled, and we never enable it. From the Ironlake PRM,
-          * same section as above, 3DSTATE_DEPTH_BUFFER's
-          * "Separate Stencil Buffer Enable" bit:
-          *
-          * "If this field is disabled, the Surface Format of the depth
-          *  buffer cannot be D24_UNORM_X8_UINT."
-          */
-         return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
-      }
-   case MESA_FORMAT_Z24_UNORM_S8_UINT:
-      return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
-   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
-      return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
-   default:
-      unreachable("Unexpected depth format.");
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/brw_sync.c b/src/mesa/drivers/dri/i965/brw_sync.c
deleted file mode 100644
index 97cb9e7..0000000
--- a/src/mesa/drivers/dri/i965/brw_sync.c
+++ /dev/null
@@ -1,642 +0,0 @@
-/*
- * Copyright Â© 2008 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *
- */
-
-/**
- * \file
- * \brief Support for GL_ARB_sync and EGL_KHR_fence_sync.
- *
- * GL_ARB_sync is implemented by flushing the current batchbuffer and keeping a
- * reference on it.  We can then check for completion or wait for completion
- * using the normal buffer object mechanisms.  This does mean that if an
- * application is using many sync objects, it will emit small batchbuffers
- * which may end up being a significant overhead.  In other tests of removing
- * gratuitous batchbuffer syncs in Mesa, it hasn't appeared to be a significant
- * performance bottleneck, though.
- */
-
-#include <libsync.h> /* Requires Android or libdrm-2.4.72 */
-
-#include "util/os_file.h"
-#include "util/u_memory.h"
-#include <xf86drm.h>
-
-#include "brw_context.h"
-#include "brw_batch.h"
-#include "mesa/main/externalobjects.h"
-
-struct brw_fence {
-   struct brw_context *brw;
-
-   enum brw_fence_type {
-      /** The fence waits for completion of brw_fence::batch_bo. */
-      BRW_FENCE_TYPE_BO_WAIT,
-
-      /** The fence waits for brw_fence::sync_fd to signal. */
-      BRW_FENCE_TYPE_SYNC_FD,
-   } type;
-
-   union {
-      struct brw_bo *batch_bo;
-
-      /* This struct owns the fd. */
-      int sync_fd;
-   };
-
-   mtx_t mutex;
-   bool signalled;
-};
-
-struct brw_gl_sync {
-   struct gl_sync_object gl;
-   struct brw_fence fence;
-};
-
-struct intel_semaphore_object {
-   struct gl_semaphore_object Base;
-   struct drm_syncobj_handle *syncobj;
-};
-
-static inline struct intel_semaphore_object *
-intel_semaphore_object(struct gl_semaphore_object *sem_obj) {
-   return (struct intel_semaphore_object*) sem_obj;
-}
-
-static struct gl_semaphore_object *
-intel_semaphoreobj_alloc(struct gl_context *ctx, GLuint name)
-{
-   struct intel_semaphore_object *is_obj = CALLOC_STRUCT(intel_semaphore_object);
-   if (!is_obj)
-      return NULL;
-
-   _mesa_initialize_semaphore_object(ctx, &is_obj->Base, name);
-   return &is_obj->Base;
-}
-
-static void
-intel_semaphoreobj_free(struct gl_context *ctx,
-                     struct gl_semaphore_object *semObj)
-{
-   _mesa_delete_semaphore_object(ctx, semObj);
-}
-
-static void
-intel_semaphoreobj_import(struct gl_context *ctx,
-                                struct gl_semaphore_object *semObj,
-                                int fd)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_screen *screen = brw->screen;
-   struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj);
-   iSemObj->syncobj = CALLOC_STRUCT(drm_syncobj_handle);
-   iSemObj->syncobj->fd = fd;
-
-   if (drmIoctl(screen->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, iSemObj->syncobj) < 0) {
-      fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE failed: %s\n",
-              strerror(errno));
-      free(iSemObj->syncobj);
-   }
-}
-
-static void
-intel_semaphoreobj_signal(struct gl_context *ctx,
-                                       struct gl_semaphore_object *semObj,
-                                       GLuint numBufferBarriers,
-                                       struct gl_buffer_object **bufObjs,
-                                       GLuint numTextureBarriers,
-                                       struct gl_texture_object **texObjs,
-                                       const GLenum *dstLayouts)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj);
-   struct drm_i915_gem_exec_fence *fence =
-      util_dynarray_grow(&brw->batch.exec_fences, struct drm_i915_gem_exec_fence *, 1);
-   fence->flags = I915_EXEC_FENCE_SIGNAL;
-   fence->handle = iSemObj->syncobj->handle;
-   brw->batch.contains_fence_signal = true;
-}
-
-static void
-intel_semaphoreobj_wait(struct gl_context *ctx,
-                                     struct gl_semaphore_object *semObj,
-                                     GLuint numBufferBarriers,
-                                     struct gl_buffer_object **bufObjs,
-                                     GLuint numTextureBarriers,
-                                     struct gl_texture_object **texObjs,
-                                     const GLenum *srcLayouts)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_screen *screen = brw->screen;
-   struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj);
-   struct drm_syncobj_wait args = {
-      .handles = (uintptr_t)&iSemObj->syncobj->handle,
-      .count_handles = 1,
-   };
-
-   drmIoctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
-}
-
-static void
-brw_fence_init(struct brw_context *brw, struct brw_fence *fence,
-               enum brw_fence_type type)
-{
-   fence->brw = brw;
-   fence->type = type;
-   mtx_init(&fence->mutex, mtx_plain);
-
-   switch (type) {
-   case BRW_FENCE_TYPE_BO_WAIT:
-      fence->batch_bo = NULL;
-      break;
-    case BRW_FENCE_TYPE_SYNC_FD:
-      fence->sync_fd = -1;
-      break;
-   }
-}
-
-static void
-brw_fence_finish(struct brw_fence *fence)
-{
-   switch (fence->type) {
-   case BRW_FENCE_TYPE_BO_WAIT:
-      if (fence->batch_bo)
-         brw_bo_unreference(fence->batch_bo);
-      break;
-   case BRW_FENCE_TYPE_SYNC_FD:
-      if (fence->sync_fd != -1)
-         close(fence->sync_fd);
-      break;
-   }
-
-   mtx_destroy(&fence->mutex);
-}
-
-static bool MUST_CHECK
-brw_fence_insert_locked(struct brw_context *brw, struct brw_fence *fence)
-{
-   __DRIcontext *driContext = brw->driContext;
-   __DRIdrawable *driDrawable = driContext->driDrawablePriv;
-
-   /*
-    * From KHR_fence_sync:
-    *
-    *   When the condition of the sync object is satisfied by the fence
-    *   command, the sync is signaled by the associated client API context,
-    *   causing any eglClientWaitSyncKHR commands (see below) blocking on
-    *   <sync> to unblock. The only condition currently supported is
-    *   EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR, which is satisfied by
-    *   completion of the fence command corresponding to the sync object,
-    *   and all preceding commands in the associated client API context's
-    *   command stream. The sync object will not be signaled until all
-    *   effects from these commands on the client API's internal and
-    *   framebuffer state are fully realized. No other state is affected by
-    *   execution of the fence command.
-    *
-    * Note the emphasis there on ensuring that the framebuffer is fully
-    * realised before the fence is signaled. We cannot just flush the batch,
-    * but must also resolve the drawable first. The importance of this is,
-    * for example, in creating a fence for a frame to be passed to a
-    * remote compositor. Without us flushing the drawable explicitly, the
-    * resolve will be in a following batch (when the client finally calls
-    * SwapBuffers, or triggers a resolve via some other path) and so the
-    * compositor may read the incomplete framebuffer instead.
-    */
-   if (driDrawable)
-      brw_resolve_for_dri2_flush(brw, driDrawable);
-   brw_emit_mi_flush(brw);
-
-   switch (fence->type) {
-   case BRW_FENCE_TYPE_BO_WAIT:
-      assert(!fence->batch_bo);
-      assert(!fence->signalled);
-
-      fence->batch_bo = brw->batch.batch.bo;
-      brw_bo_reference(fence->batch_bo);
-
-      if (brw_batch_flush(brw) < 0) {
-         brw_bo_unreference(fence->batch_bo);
-         fence->batch_bo = NULL;
-         return false;
-      }
-      break;
-   case BRW_FENCE_TYPE_SYNC_FD:
-      assert(!fence->signalled);
-
-      if (fence->sync_fd == -1) {
-         /* Create an out-fence that signals after all pending commands
-          * complete.
-          */
-         if (brw_batch_flush_fence(brw, -1, &fence->sync_fd) < 0)
-            return false;
-         assert(fence->sync_fd != -1);
-      } else {
-         /* Wait on the in-fence before executing any subsequently submitted
-          * commands.
-          */
-         if (brw_batch_flush(brw) < 0)
-            return false;
-
-         /* Emit a dummy batch just for the fence. */
-         brw_emit_mi_flush(brw);
-         if (brw_batch_flush_fence(brw, fence->sync_fd, NULL) < 0)
-            return false;
-      }
-      break;
-   }
-
-   return true;
-}
-
-static bool MUST_CHECK
-brw_fence_insert(struct brw_context *brw, struct brw_fence *fence)
-{
-   bool ret;
-
-   mtx_lock(&fence->mutex);
-   ret = brw_fence_insert_locked(brw, fence);
-   mtx_unlock(&fence->mutex);
-
-   return ret;
-}
-
-static bool
-brw_fence_has_completed_locked(struct brw_fence *fence)
-{
-   if (fence->signalled)
-      return true;
-
-   switch (fence->type) {
-   case BRW_FENCE_TYPE_BO_WAIT:
-      if (!fence->batch_bo) {
-         /* There may be no batch if brw_batch_flush() failed. */
-         return false;
-      }
-
-      if (brw_bo_busy(fence->batch_bo))
-         return false;
-
-      brw_bo_unreference(fence->batch_bo);
-      fence->batch_bo = NULL;
-      fence->signalled = true;
-
-      return true;
-
-   case BRW_FENCE_TYPE_SYNC_FD:
-      assert(fence->sync_fd != -1);
-
-      if (sync_wait(fence->sync_fd, 0) == -1)
-         return false;
-
-      fence->signalled = true;
-
-      return true;
-   }
-
-   return false;
-}
-
-static bool
-brw_fence_has_completed(struct brw_fence *fence)
-{
-   bool ret;
-
-   mtx_lock(&fence->mutex);
-   ret = brw_fence_has_completed_locked(fence);
-   mtx_unlock(&fence->mutex);
-
-   return ret;
-}
-
-static bool
-brw_fence_client_wait_locked(struct brw_context *brw, struct brw_fence *fence,
-                             uint64_t timeout)
-{
-   int32_t timeout_i32;
-
-   if (fence->signalled)
-      return true;
-
-   switch (fence->type) {
-   case BRW_FENCE_TYPE_BO_WAIT:
-      if (!fence->batch_bo) {
-         /* There may be no batch if brw_batch_flush() failed. */
-         return false;
-      }
-
-      /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns
-       * immediately for timeouts <= 0.  The best we can do is to clamp the
-       * timeout to INT64_MAX.  This limits the maximum timeout from 584 years to
-       * 292 years - likely not a big deal.
-       */
-      if (timeout > INT64_MAX)
-         timeout = INT64_MAX;
-
-      if (brw_bo_wait(fence->batch_bo, timeout) != 0)
-         return false;
-
-      fence->signalled = true;
-      brw_bo_unreference(fence->batch_bo);
-      fence->batch_bo = NULL;
-
-      return true;
-   case BRW_FENCE_TYPE_SYNC_FD:
-      if (fence->sync_fd == -1)
-         return false;
-
-      if (timeout > INT32_MAX)
-         timeout_i32 = -1;
-      else
-         timeout_i32 = timeout;
-
-      if (sync_wait(fence->sync_fd, timeout_i32) == -1)
-         return false;
-
-      fence->signalled = true;
-      return true;
-   }
-
-   assert(!"bad enum brw_fence_type");
-   return false;
-}
-
-/**
- * Return true if the function successfully signals or has already signalled.
- * (This matches the behavior expected from __DRI2fence::client_wait_sync).
- */
-static bool
-brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
-                      uint64_t timeout)
-{
-   bool ret;
-
-   mtx_lock(&fence->mutex);
-   ret = brw_fence_client_wait_locked(brw, fence, timeout);
-   mtx_unlock(&fence->mutex);
-
-   return ret;
-}
-
-static void
-brw_fence_server_wait(struct brw_context *brw, struct brw_fence *fence)
-{
-   switch (fence->type) {
-   case BRW_FENCE_TYPE_BO_WAIT:
-      /* We have nothing to do for WaitSync.  Our GL command stream is sequential,
-       * so given that the sync object has already flushed the batchbuffer, any
-       * batchbuffers coming after this waitsync will naturally not occur until
-       * the previous one is done.
-       */
-      break;
-   case BRW_FENCE_TYPE_SYNC_FD:
-      assert(fence->sync_fd != -1);
-
-      /* The user wants explicit synchronization, so give them what they want. */
-      if (!brw_fence_insert(brw, fence)) {
-         /* FIXME: There exists no way yet to report an error here. If an error
-          * occurs, continue silently and hope for the best.
-          */
-      }
-      break;
-   }
-}
-
-static struct gl_sync_object *
-brw_gl_new_sync(struct gl_context *ctx)
-{
-   struct brw_gl_sync *sync;
-
-   sync = calloc(1, sizeof(*sync));
-   if (!sync)
-      return NULL;
-
-   return &sync->gl;
-}
-
-static void
-brw_gl_delete_sync(struct gl_context *ctx, struct gl_sync_object *_sync)
-{
-   struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
-
-   brw_fence_finish(&sync->fence);
-   free(sync->gl.Label);
-   free(sync);
-}
-
-static void
-brw_gl_fence_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
-                  GLenum condition, GLbitfield flags)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
-
-   /* brw_fence_insert_locked() assumes it must do a complete flush */
-   assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
-
-   brw_fence_init(brw, &sync->fence, BRW_FENCE_TYPE_BO_WAIT);
-
-   if (!brw_fence_insert_locked(brw, &sync->fence)) {
-      /* FIXME: There exists no way to report a GL error here. If an error
-       * occurs, continue silently and hope for the best.
-       */
-   }
-}
-
-static void
-brw_gl_client_wait_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
-                        GLbitfield flags, GLuint64 timeout)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
-
-   if (brw_fence_client_wait(brw, &sync->fence, timeout))
-      sync->gl.StatusFlag = 1;
-}
-
-static void
-brw_gl_server_wait_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
-                          GLbitfield flags, GLuint64 timeout)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
-
-   brw_fence_server_wait(brw, &sync->fence);
-}
-
-static void
-brw_gl_check_sync(struct gl_context *ctx, struct gl_sync_object *_sync)
-{
-   struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
-
-   if (brw_fence_has_completed(&sync->fence))
-      sync->gl.StatusFlag = 1;
-}
-
-void
-brw_init_syncobj_functions(struct dd_function_table *functions)
-{
-   functions->NewSyncObject = brw_gl_new_sync;
-   functions->DeleteSyncObject = brw_gl_delete_sync;
-   functions->FenceSync = brw_gl_fence_sync;
-   functions->CheckSync = brw_gl_check_sync;
-   functions->ClientWaitSync = brw_gl_client_wait_sync;
-   functions->ServerWaitSync = brw_gl_server_wait_sync;
-   functions->NewSemaphoreObject = intel_semaphoreobj_alloc;
-   functions->DeleteSemaphoreObject = intel_semaphoreobj_free;
-   functions->ImportSemaphoreFd = intel_semaphoreobj_import;
-   functions->ServerSignalSemaphoreObject = intel_semaphoreobj_signal;
-   functions->ServerWaitSemaphoreObject = intel_semaphoreobj_wait;
-}
-
-static void *
-brw_dri_create_fence(__DRIcontext *ctx)
-{
-   struct brw_context *brw = ctx->driverPrivate;
-   struct brw_fence *fence;
-
-   fence = calloc(1, sizeof(*fence));
-   if (!fence)
-      return NULL;
-
-   brw_fence_init(brw, fence, BRW_FENCE_TYPE_BO_WAIT);
-
-   if (!brw_fence_insert_locked(brw, fence)) {
-      brw_fence_finish(fence);
-      free(fence);
-      return NULL;
-   }
-
-   return fence;
-}
-
-static void
-brw_dri_destroy_fence(__DRIscreen *dri_screen, void *_fence)
-{
-   struct brw_fence *fence = _fence;
-
-   brw_fence_finish(fence);
-   free(fence);
-}
-
-static GLboolean
-brw_dri_client_wait_sync(__DRIcontext *ctx, void *_fence, unsigned flags,
-                         uint64_t timeout)
-{
-   struct brw_fence *fence = _fence;
-
-   return brw_fence_client_wait(fence->brw, fence, timeout);
-}
-
-static void
-brw_dri_server_wait_sync(__DRIcontext *ctx, void *_fence, unsigned flags)
-{
-   struct brw_fence *fence = _fence;
-
-   /* We might be called here with a NULL fence as a result of WaitSyncKHR
-    * on a EGL_KHR_reusable_sync fence. Nothing to do here in such case.
-    */
-   if (!fence)
-      return;
-
-   brw_fence_server_wait(fence->brw, fence);
-}
-
-static unsigned
-brw_dri_get_capabilities(__DRIscreen *dri_screen)
-{
-   struct brw_screen *screen = dri_screen->driverPrivate;
-   unsigned caps = 0;
-
-   if (screen->has_exec_fence)
-      caps |=  __DRI_FENCE_CAP_NATIVE_FD;
-
-   return caps;
-}
-
-static void *
-brw_dri_create_fence_fd(__DRIcontext *dri_ctx, int fd)
-{
-   struct brw_context *brw = dri_ctx->driverPrivate;
-   struct brw_fence *fence;
-
-   assert(brw->screen->has_exec_fence);
-
-   fence = calloc(1, sizeof(*fence));
-   if (!fence)
-      return NULL;
-
-   brw_fence_init(brw, fence, BRW_FENCE_TYPE_SYNC_FD);
-
-   if (fd == -1) {
-      /* Create an out-fence fd */
-      if (!brw_fence_insert_locked(brw, fence))
-         goto fail;
-   } else {
-      /* Import the sync fd as an in-fence. */
-      fence->sync_fd = os_dupfd_cloexec(fd);
-   }
-
-   assert(fence->sync_fd != -1);
-
-   return fence;
-
-fail:
-   brw_fence_finish(fence);
-   free(fence);
-   return NULL;
-}
-
-static int
-brw_dri_get_fence_fd_locked(struct brw_fence *fence)
-{
-   assert(fence->type == BRW_FENCE_TYPE_SYNC_FD);
-   return os_dupfd_cloexec(fence->sync_fd);
-}
-
-static int
-brw_dri_get_fence_fd(__DRIscreen *dri_screen, void *_fence)
-{
-   struct brw_fence *fence = _fence;
-   int fd;
-
-   mtx_lock(&fence->mutex);
-   fd = brw_dri_get_fence_fd_locked(fence);
-   mtx_unlock(&fence->mutex);
-
-   return fd;
-}
-
-const __DRI2fenceExtension brwFenceExtension = {
-   .base = { __DRI2_FENCE, 2 },
-
-   .create_fence = brw_dri_create_fence,
-   .destroy_fence = brw_dri_destroy_fence,
-   .client_wait_sync = brw_dri_client_wait_sync,
-   .server_wait_sync = brw_dri_server_wait_sync,
-   .get_fence_from_cl_event = NULL,
-   .get_capabilities = brw_dri_get_capabilities,
-   .create_fence_fd = brw_dri_create_fence_fd,
-   .get_fence_fd = brw_dri_get_fence_fd,
-};
diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c
deleted file mode 100644
index 5eaa3ed..0000000
--- a/src/mesa/drivers/dri/i965/brw_tcs.c
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Copyright Â© 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_tcs.c
- *
- * Tessellation control shader state upload code.
- */
-
-#include "brw_context.h"
-#include "compiler/brw_nir.h"
-#include "brw_program.h"
-#include "brw_state.h"
-#include "program/prog_parameter.h"
-#include "nir_builder.h"
-
-static bool
-brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp,
-                     struct brw_program *tep, struct brw_tcs_prog_key *key)
-{
-   struct gl_context *ctx = &brw->ctx;
-   const struct brw_compiler *compiler = brw->screen->compiler;
-   const struct intel_device_info *devinfo = compiler->devinfo;
-   struct brw_stage_state *stage_state = &brw->tcs.base;
-   nir_shader *nir;
-   struct brw_tcs_prog_data prog_data;
-   bool start_busy = false;
-   double start_time = 0;
-
-   void *mem_ctx = ralloc_context(NULL);
-   if (tcp) {
-      nir = nir_shader_clone(mem_ctx, tcp->program.nir);
-   } else {
-      const nir_shader_compiler_options *options =
-         ctx->Const.ShaderCompilerOptions[MESA_SHADER_TESS_CTRL].NirOptions;
-      nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key);
-   }
-
-   memset(&prog_data, 0, sizeof(prog_data));
-
-   if (tcp) {
-      brw_assign_common_binding_table_offsets(devinfo, &tcp->program,
-                                              &prog_data.base.base, 0);
-
-      brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tcp->program,
-                                  &prog_data.base.base,
-                                  compiler->scalar_stage[MESA_SHADER_TESS_CTRL]);
-      if (brw->can_push_ubos) {
-         brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
-                                    prog_data.base.base.ubo_ranges);
-      }
-   } else {
-      /* Upload the Patch URB Header as the first two uniforms.
-       * Do the annoying scrambling so the shader doesn't have to.
-       */
-      assert(nir->num_uniforms == 32);
-      prog_data.base.base.param = rzalloc_array(mem_ctx, uint32_t, 8);
-      prog_data.base.base.nr_params = 8;
-
-      uint32_t *param = prog_data.base.base.param;
-      for (int i = 0; i < 8; i++)
-         param[i] = BRW_PARAM_BUILTIN_ZERO;
-
-      if (key->tes_primitive_mode == GL_QUADS) {
-         for (int i = 0; i < 4; i++)
-            param[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
-
-         param[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
-         param[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
-      } else if (key->tes_primitive_mode == GL_TRIANGLES) {
-         for (int i = 0; i < 3; i++)
-            param[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
-
-         param[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
-      } else {
-         assert(key->tes_primitive_mode == GL_ISOLINES);
-         param[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
-         param[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
-      }
-   }
-
-   int st_index = -1;
-   if (INTEL_DEBUG(DEBUG_SHADER_TIME) && tep)
-      st_index = brw_get_shader_time_index(brw, &tep->program, ST_TCS, true);
-
-   if (unlikely(brw->perf_debug)) {
-      start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo);
-      start_time = get_time();
-   }
-
-   char *error_str;
-   const unsigned *program =
-      brw_compile_tcs(compiler, brw, mem_ctx, key, &prog_data, nir, st_index,
-                      NULL, &error_str);
-   if (program == NULL) {
-      if (tep) {
-         tep->program.sh.data->LinkStatus = LINKING_FAILURE;
-         ralloc_strcat(&tep->program.sh.data->InfoLog, error_str);
-      }
-
-      _mesa_problem(NULL, "Failed to compile tessellation control shader: "
-                    "%s\n", error_str);
-
-      ralloc_free(mem_ctx);
-      return false;
-   }
-
-   if (unlikely(brw->perf_debug)) {
-      if (tcp) {
-         if (tcp->compiled_once) {
-            brw_debug_recompile(brw, MESA_SHADER_TESS_CTRL, tcp->program.Id,
-                                &key->base);
-         }
-         tcp->compiled_once = true;
-      }
-
-      if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
-         perf_debug("TCS compile took %.03f ms and stalled the GPU\n",
-                    (get_time() - start_time) * 1000);
-      }
-   }
-
-   /* Scratch space is used for register spilling */
-   brw_alloc_stage_scratch(brw, stage_state,
-                           prog_data.base.base.total_scratch);
-
-   /* The param and pull_param arrays will be freed by the shader cache. */
-   ralloc_steal(NULL, prog_data.base.base.param);
-   ralloc_steal(NULL, prog_data.base.base.pull_param);
-   brw_upload_cache(&brw->cache, BRW_CACHE_TCS_PROG,
-                    key, sizeof(*key),
-                    program, prog_data.base.base.program_size,
-                    &prog_data, sizeof(prog_data),
-                    &stage_state->prog_offset, &brw->tcs.base.prog_data);
-   ralloc_free(mem_ctx);
-
-   return true;
-}
-
-void
-brw_tcs_populate_key(struct brw_context *brw,
-                     struct brw_tcs_prog_key *key)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const struct brw_compiler *compiler = brw->screen->compiler;
-   struct brw_program *tcp =
-      (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
-   struct brw_program *tep =
-      (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
-   struct gl_program *tes_prog = &tep->program;
-
-   uint64_t per_vertex_slots = tes_prog->info.inputs_read;
-   uint32_t per_patch_slots = tes_prog->info.patch_inputs_read;
-
-   memset(key, 0, sizeof(*key));
-
-   if (tcp) {
-      struct gl_program *prog = &tcp->program;
-      per_vertex_slots |= prog->info.outputs_written;
-      per_patch_slots |= prog->info.patch_outputs_written;
-   }
-
-   if (devinfo->ver < 8 || !tcp || compiler->use_tcs_8_patch)
-      key->input_vertices = brw->ctx.TessCtrlProgram.patch_vertices;
-   key->outputs_written = per_vertex_slots;
-   key->patch_outputs_written = per_patch_slots;
-
-   /* We need to specialize our code generation for tessellation levels
-    * based on the domain the DS is expecting to tessellate.
-    */
-   key->tes_primitive_mode = tep->program.info.tess.primitive_mode;
-   key->quads_workaround = devinfo->ver < 9 &&
-                           tep->program.info.tess.primitive_mode == GL_QUADS &&
-                           tep->program.info.tess.spacing == TESS_SPACING_EQUAL;
-
-   if (tcp) {
-      /* _NEW_TEXTURE */
-      brw_populate_base_prog_key(&brw->ctx, tcp, &key->base);
-   }
-}
-
-void
-brw_upload_tcs_prog(struct brw_context *brw)
-{
-   struct brw_stage_state *stage_state = &brw->tcs.base;
-   struct brw_tcs_prog_key key;
-   /* BRW_NEW_TESS_PROGRAMS */
-   struct brw_program *tcp =
-      (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
-   ASSERTED struct brw_program *tep =
-      (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
-   assert(tep);
-
-   if (!brw_state_dirty(brw,
-                        _NEW_TEXTURE,
-                        BRW_NEW_PATCH_PRIMITIVE |
-                        BRW_NEW_TESS_PROGRAMS))
-      return;
-
-   brw_tcs_populate_key(brw, &key);
-
-   if (brw_search_cache(&brw->cache, BRW_CACHE_TCS_PROG, &key, sizeof(key),
-                        &stage_state->prog_offset, &brw->tcs.base.prog_data,
-                        true))
-      return;
-
-   if (brw_disk_cache_upload_program(brw, MESA_SHADER_TESS_CTRL))
-      return;
-
-   tcp = (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
-   if (tcp)
-      tcp->id = key.base.program_string_id;
-
-   ASSERTED bool success = brw_codegen_tcs_prog(brw, tcp, tep, &key);
-   assert(success);
-}
-
-void
-brw_tcs_populate_default_key(const struct brw_compiler *compiler,
-                             struct brw_tcs_prog_key *key,
-                             struct gl_shader_program *sh_prog,
-                             struct gl_program *prog)
-{
-   const struct intel_device_info *devinfo = compiler->devinfo;
-   struct brw_program *btcp = brw_program(prog);
-   const struct gl_linked_shader *tes =
-      sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
-
-   memset(key, 0, sizeof(*key));
-
-   brw_populate_default_base_prog_key(devinfo, btcp, &key->base);
-
-   /* Guess that the input and output patches have the same dimensionality. */
-   if (devinfo->ver < 8 || compiler->use_tcs_8_patch)
-      key->input_vertices = prog->info.tess.tcs_vertices_out;
-
-   if (tes) {
-      key->tes_primitive_mode = tes->Program->info.tess.primitive_mode;
-      key->quads_workaround = devinfo->ver < 9 &&
-                              tes->Program->info.tess.primitive_mode == GL_QUADS &&
-                              tes->Program->info.tess.spacing == TESS_SPACING_EQUAL;
-   } else {
-      key->tes_primitive_mode = GL_TRIANGLES;
-   }
-
-   key->outputs_written = prog->nir->info.outputs_written;
-   key->patch_outputs_written = prog->nir->info.patch_outputs_written;
-}
-
-bool
-brw_tcs_precompile(struct gl_context *ctx,
-                   struct gl_shader_program *shader_prog,
-                   struct gl_program *prog)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct brw_compiler *compiler = brw->screen->compiler;
-   struct brw_tcs_prog_key key;
-   uint32_t old_prog_offset = brw->tcs.base.prog_offset;
-   struct brw_stage_prog_data *old_prog_data = brw->tcs.base.prog_data;
-   bool success;
-
-   struct brw_program *btcp = brw_program(prog);
-   const struct gl_linked_shader *tes =
-      shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
-   struct brw_program *btep = tes ? brw_program(tes->Program) : NULL;
-
-   brw_tcs_populate_default_key(compiler, &key, shader_prog, prog);
-
-   success = brw_codegen_tcs_prog(brw, btcp, btep, &key);
-
-   brw->tcs.base.prog_offset = old_prog_offset;
-   brw->tcs.base.prog_data = old_prog_data;
-
-   return success;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_tcs_surface_state.c b/src/mesa/drivers/dri/i965/brw_tcs_surface_state.c
deleted file mode 100644
index 73179c0..0000000
--- a/src/mesa/drivers/dri/i965/brw_tcs_surface_state.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright Â© 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/mtypes.h"
-#include "program/prog_parameter.h"
-#include "main/shaderapi.h"
-
-#include "brw_context.h"
-#include "brw_state.h"
-
-
-/* Creates a new TCS constant buffer reflecting the current TCS program's
- * constants, if needed by the TCS program.
- *
- * Otherwise, constants go through the CURBEs using the brw_constant_buffer
- * state atom.
- */
-static void
-brw_upload_tcs_pull_constants(struct brw_context *brw)
-{
-   struct brw_stage_state *stage_state = &brw->tcs.base;
-
-   /* BRW_NEW_TESS_PROGRAMS */
-   struct brw_program *tcp =
-      (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
-
-   if (!tcp)
-      return;
-
-   /* BRW_NEW_TCS_PROG_DATA */
-   const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
-
-   _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_CTRL);
-   /* _NEW_PROGRAM_CONSTANTS */
-   brw_upload_pull_constants(brw, BRW_NEW_TCS_CONSTBUF, &tcp->program,
-                             stage_state, prog_data);
-}
-
-const struct brw_tracked_state brw_tcs_pull_constants = {
-   .dirty = {
-      .mesa = _NEW_PROGRAM_CONSTANTS,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_TCS_PROG_DATA |
-             BRW_NEW_TESS_PROGRAMS,
-   },
-   .emit = brw_upload_tcs_pull_constants,
-};
-
-static void
-brw_upload_tcs_ubo_surfaces(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   /* _NEW_PROGRAM */
-   struct gl_program *prog =
-      ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
-
-   /* BRW_NEW_TCS_PROG_DATA */
-   struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
-
-   brw_upload_ubo_surfaces(brw, prog, &brw->tcs.base, prog_data);
-}
-
-const struct brw_tracked_state brw_tcs_ubo_surfaces = {
-   .dirty = {
-      .mesa = _NEW_PROGRAM,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_TCS_PROG_DATA |
-             BRW_NEW_UNIFORM_BUFFER,
-   },
-   .emit = brw_upload_tcs_ubo_surfaces,
-};
-
-static void
-brw_upload_tcs_image_surfaces(struct brw_context *brw)
-{
-   /* BRW_NEW_TESS_PROGRAMS */
-   const struct gl_program *tcp = brw->programs[MESA_SHADER_TESS_CTRL];
-
-   if (tcp) {
-      /* BRW_NEW_TCS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
-      brw_upload_image_surfaces(brw, tcp, &brw->tcs.base,
-                                brw->tcs.base.prog_data);
-   }
-}
-
-const struct brw_tracked_state brw_tcs_image_surfaces = {
-   .dirty = {
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_AUX_STATE |
-             BRW_NEW_IMAGE_UNITS |
-             BRW_NEW_TCS_PROG_DATA |
-             BRW_NEW_TESS_PROGRAMS,
-   },
-   .emit = brw_upload_tcs_image_surfaces,
-};
diff --git a/src/mesa/drivers/dri/i965/brw_tes.c b/src/mesa/drivers/dri/i965/brw_tes.c
deleted file mode 100644
index 19114c5..0000000
--- a/src/mesa/drivers/dri/i965/brw_tes.c
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright Â© 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_tes.c
- *
- * Tessellation evaluation shader state upload code.
- */
-
-#include "brw_context.h"
-#include "compiler/brw_nir.h"
-#include "brw_program.h"
-#include "brw_state.h"
-#include "program/prog_parameter.h"
-
-static bool
-brw_codegen_tes_prog(struct brw_context *brw,
-                     struct brw_program *tep,
-                     struct brw_tes_prog_key *key)
-{
-   const struct brw_compiler *compiler = brw->screen->compiler;
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct brw_stage_state *stage_state = &brw->tes.base;
-   struct brw_tes_prog_data prog_data;
-   bool start_busy = false;
-   double start_time = 0;
-
-   memset(&prog_data, 0, sizeof(prog_data));
-
-   void *mem_ctx = ralloc_context(NULL);
-
-   nir_shader *nir = nir_shader_clone(mem_ctx, tep->program.nir);
-
-   brw_assign_common_binding_table_offsets(devinfo, &tep->program,
-                                           &prog_data.base.base, 0);
-
-   brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tep->program,
-                               &prog_data.base.base,
-                               compiler->scalar_stage[MESA_SHADER_TESS_EVAL]);
-   if (brw->can_push_ubos) {
-      brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
-                                 prog_data.base.base.ubo_ranges);
-   }
-
-   int st_index = -1;
-   if (INTEL_DEBUG(DEBUG_SHADER_TIME))
-      st_index = brw_get_shader_time_index(brw, &tep->program, ST_TES, true);
-
-   if (unlikely(brw->perf_debug)) {
-      start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo);
-      start_time = get_time();
-   }
-
-   struct brw_vue_map input_vue_map;
-   brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
-                            key->patch_inputs_read);
-
-   char *error_str;
-   const unsigned *program =
-      brw_compile_tes(compiler, brw, mem_ctx, key, &input_vue_map, &prog_data,
-                      nir, st_index, NULL, &error_str);
-   if (program == NULL) {
-      tep->program.sh.data->LinkStatus = LINKING_FAILURE;
-      ralloc_strcat(&tep->program.sh.data->InfoLog, error_str);
-
-      _mesa_problem(NULL, "Failed to compile tessellation evaluation shader: "
-                    "%s\n", error_str);
-
-      ralloc_free(mem_ctx);
-      return false;
-   }
-
-   if (unlikely(brw->perf_debug)) {
-      if (tep->compiled_once) {
-         brw_debug_recompile(brw, MESA_SHADER_TESS_EVAL, tep->program.Id,
-                             &key->base);
-      }
-      if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
-         perf_debug("TES compile took %.03f ms and stalled the GPU\n",
-                    (get_time() - start_time) * 1000);
-      }
-      tep->compiled_once = true;
-   }
-
-   /* Scratch space is used for register spilling */
-   brw_alloc_stage_scratch(brw, stage_state,
-                           prog_data.base.base.total_scratch);
-
-   /* The param and pull_param arrays will be freed by the shader cache. */
-   ralloc_steal(NULL, prog_data.base.base.param);
-   ralloc_steal(NULL, prog_data.base.base.pull_param);
-   brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG,
-                    key, sizeof(*key),
-                    program, prog_data.base.base.program_size,
-                    &prog_data, sizeof(prog_data),
-                    &stage_state->prog_offset, &brw->tes.base.prog_data);
-   ralloc_free(mem_ctx);
-
-   return true;
-}
-
-void
-brw_tes_populate_key(struct brw_context *brw,
-                     struct brw_tes_prog_key *key)
-{
-   struct brw_program *tcp =
-      (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
-   struct brw_program *tep =
-      (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
-   struct gl_program *prog = &tep->program;
-
-   uint64_t per_vertex_slots = prog->info.inputs_read;
-   uint32_t per_patch_slots = prog->info.patch_inputs_read;
-
-   memset(key, 0, sizeof(*key));
-
-   /* _NEW_TEXTURE */
-   brw_populate_base_prog_key(&brw->ctx, tep, &key->base);
-
-   /* The TCS may have additional outputs which aren't read by the
-    * TES (possibly for cross-thread communication).  These need to
-    * be stored in the Patch URB Entry as well.
-    */
-   if (tcp) {
-      struct gl_program *tcp_prog = &tcp->program;
-      per_vertex_slots |= tcp_prog->info.outputs_written &
-         ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
-      per_patch_slots |= tcp_prog->info.patch_outputs_written;
-   }
-
-   key->inputs_read = per_vertex_slots;
-   key->patch_inputs_read = per_patch_slots;
-}
-
-void
-brw_upload_tes_prog(struct brw_context *brw)
-{
-   struct brw_stage_state *stage_state = &brw->tes.base;
-   struct brw_tes_prog_key key;
-   /* BRW_NEW_TESS_PROGRAMS */
-   struct brw_program *tep =
-      (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
-
-   if (!brw_state_dirty(brw,
-                        _NEW_TEXTURE,
-                        BRW_NEW_TESS_PROGRAMS))
-      return;
-
-   brw_tes_populate_key(brw, &key);
-
-   if (brw_search_cache(&brw->cache, BRW_CACHE_TES_PROG, &key, sizeof(key),
-                        &stage_state->prog_offset, &brw->tes.base.prog_data,
-                        true))
-      return;
-
-   if (brw_disk_cache_upload_program(brw, MESA_SHADER_TESS_EVAL))
-      return;
-
-   tep = (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
-   tep->id = key.base.program_string_id;
-
-   ASSERTED bool success = brw_codegen_tes_prog(brw, tep, &key);
-   assert(success);
-}
-
-void
-brw_tes_populate_default_key(const struct brw_compiler *compiler,
-                             struct brw_tes_prog_key *key,
-                             struct gl_shader_program *sh_prog,
-                             struct gl_program *prog)
-{
-   const struct intel_device_info *devinfo = compiler->devinfo;
-   struct brw_program *btep = brw_program(prog);
-
-   memset(key, 0, sizeof(*key));
-
-   brw_populate_default_base_prog_key(devinfo, btep, &key->base);
-
-   key->inputs_read = prog->nir->info.inputs_read;
-   key->patch_inputs_read = prog->nir->info.patch_inputs_read;
-
-   if (sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]) {
-      struct gl_program *tcp =
-         sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program;
-      key->inputs_read |= tcp->nir->info.outputs_written &
-         ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
-      key->patch_inputs_read |= tcp->nir->info.patch_outputs_written;
-   }
-}
-
-bool
-brw_tes_precompile(struct gl_context *ctx,
-                   struct gl_shader_program *shader_prog,
-                   struct gl_program *prog)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct brw_compiler *compiler = brw->screen->compiler;
-   struct brw_tes_prog_key key;
-   uint32_t old_prog_offset = brw->tes.base.prog_offset;
-   struct brw_stage_prog_data *old_prog_data = brw->tes.base.prog_data;
-   bool success;
-
-   struct brw_program *btep = brw_program(prog);
-
-   brw_tes_populate_default_key(compiler, &key, shader_prog, prog);
-
-   success = brw_codegen_tes_prog(brw, btep, &key);
-
-   brw->tes.base.prog_offset = old_prog_offset;
-   brw->tes.base.prog_data = old_prog_data;
-
-   return success;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_tes_surface_state.c b/src/mesa/drivers/dri/i965/brw_tes_surface_state.c
deleted file mode 100644
index 6e9e58a..0000000
--- a/src/mesa/drivers/dri/i965/brw_tes_surface_state.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright Â© 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/mtypes.h"
-#include "program/prog_parameter.h"
-#include "main/shaderapi.h"
-
-#include "brw_context.h"
-#include "brw_state.h"
-
-
-/* Creates a new TES constant buffer reflecting the current TES program's
- * constants, if needed by the TES program.
- *
- * Otherwise, constants go through the CURBEs using the brw_constant_buffer
- * state atom.
- */
-static void
-brw_upload_tes_pull_constants(struct brw_context *brw)
-{
-   struct brw_stage_state *stage_state = &brw->tes.base;
-
-   /* BRW_NEW_TESS_PROGRAMS */
-   struct brw_program *dp =
-      (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
-
-   if (!dp)
-      return;
-
-   /* BRW_NEW_TES_PROG_DATA */
-   const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
-
-   _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_EVAL);
-   /* _NEW_PROGRAM_CONSTANTS */
-   brw_upload_pull_constants(brw, BRW_NEW_TES_CONSTBUF, &dp->program,
-                             stage_state, prog_data);
-}
-
-const struct brw_tracked_state brw_tes_pull_constants = {
-   .dirty = {
-      .mesa = _NEW_PROGRAM_CONSTANTS,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_TES_PROG_DATA |
-             BRW_NEW_TESS_PROGRAMS,
-   },
-   .emit = brw_upload_tes_pull_constants,
-};
-
-static void
-brw_upload_tes_ubo_surfaces(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   /* _NEW_PROGRAM */
-   struct gl_program *prog =
-      ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
-
-   /* BRW_NEW_TES_PROG_DATA */
-   struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
-
-   brw_upload_ubo_surfaces(brw, prog, &brw->tes.base, prog_data);
-}
-
-const struct brw_tracked_state brw_tes_ubo_surfaces = {
-   .dirty = {
-      .mesa = _NEW_PROGRAM,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_TES_PROG_DATA |
-             BRW_NEW_UNIFORM_BUFFER,
-   },
-   .emit = brw_upload_tes_ubo_surfaces,
-};
-
-static void
-brw_upload_tes_image_surfaces(struct brw_context *brw)
-{
-   /* BRW_NEW_TESS_PROGRAMS */
-   const struct gl_program *tep = brw->programs[MESA_SHADER_TESS_EVAL];
-
-   if (tep) {
-      /* BRW_NEW_TES_PROG_DATA, BRW_NEW_IMAGE_UNITS */
-      brw_upload_image_surfaces(brw, tep, &brw->tes.base,
-                                brw->tes.base.prog_data);
-   }
-}
-
-const struct brw_tracked_state brw_tes_image_surfaces = {
-   .dirty = {
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_AUX_STATE |
-             BRW_NEW_IMAGE_UNITS |
-             BRW_NEW_TESS_PROGRAMS |
-             BRW_NEW_TES_PROG_DATA,
-   },
-   .emit = brw_upload_tes_image_surfaces,
-};
diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c
deleted file mode 100644
index cbb4cd2..0000000
--- a/src/mesa/drivers/dri/i965/brw_tex.c
+++ /dev/null
@@ -1,415 +0,0 @@
-#include "swrast/swrast.h"
-#include "main/renderbuffer.h"
-#include "main/texobj.h"
-#include "main/teximage.h"
-#include "main/mipmap.h"
-#include "drivers/common/meta.h"
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_buffer_objects.h"
-#include "brw_mipmap_tree.h"
-#include "brw_tex.h"
-#include "brw_fbo.h"
-#include "brw_state.h"
-#include "util/u_memory.h"
-
-#define FILE_DEBUG_FLAG DEBUG_TEXTURE
-
-static struct gl_texture_image *
-brw_new_texture_image(struct gl_context *ctx)
-{
-   DBG("%s\n", __func__);
-   (void) ctx;
-   return (struct gl_texture_image *) CALLOC_STRUCT(brw_texture_image);
-}
-
-static void
-brw_delete_texture_image(struct gl_context *ctx, struct gl_texture_image *img)
-{
-   /* nothing special (yet) for brw_texture_image */
-   _mesa_delete_texture_image(ctx, img);
-}
-
-
-static struct gl_texture_object *
-brw_new_texture_object(struct gl_context *ctx, GLuint name, GLenum target)
-{
-   struct brw_texture_object *obj = CALLOC_STRUCT(brw_texture_object);
-
-   (void) ctx;
-
-   DBG("%s\n", __func__);
-
-   if (obj == NULL)
-      return NULL;
-
-   _mesa_initialize_texture_object(ctx, &obj->base, name, target);
-
-   obj->needs_validate = true;
-
-   return &obj->base;
-}
-
-static void
-brw_delete_texture_object(struct gl_context *ctx,
-                          struct gl_texture_object *texObj)
-{
-   struct brw_texture_object *brw_obj = brw_texture_object(texObj);
-
-   brw_miptree_release(&brw_obj->mt);
-   _mesa_delete_texture_object(ctx, texObj);
-}
-
-static GLboolean
-brw_alloc_texture_image_buffer(struct gl_context *ctx,
-                               struct gl_texture_image *image)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_texture_image *intel_image = brw_texture_image(image);
-   struct gl_texture_object *texobj = image->TexObject;
-   struct brw_texture_object *intel_texobj = brw_texture_object(texobj);
-
-   assert(image->Border == 0);
-
-   /* Quantize sample count */
-   if (image->NumSamples) {
-      image->NumSamples = brw_quantize_num_samples(brw->screen, image->NumSamples);
-      if (!image->NumSamples)
-         return false;
-   }
-
-   /* Because the driver uses AllocTextureImageBuffer() internally, it may end
-    * up mismatched with FreeTextureImageBuffer(), but that is safe to call
-    * multiple times.
-    */
-   ctx->Driver.FreeTextureImageBuffer(ctx, image);
-
-   if (!_swrast_init_texture_image(image))
-      return false;
-
-   if (intel_texobj->mt &&
-       brw_miptree_match_image(intel_texobj->mt, image)) {
-      brw_miptree_reference(&intel_image->mt, intel_texobj->mt);
-      DBG("%s: alloc obj %p level %d %dx%dx%d using object's miptree %p\n",
-          __func__, texobj, image->Level,
-          image->Width, image->Height, image->Depth, intel_texobj->mt);
-   } else {
-      intel_image->mt = brw_miptree_create_for_teximage(brw, intel_texobj,
-                                                          intel_image,
-                                                          MIPTREE_CREATE_DEFAULT);
-      if (!intel_image->mt)
-         return false;
-
-      /* Even if the object currently has a mipmap tree associated
-       * with it, this one is a more likely candidate to represent the
-       * whole object since our level didn't fit what was there
-       * before, and any lower levels would fit into our miptree.
-       */
-      brw_miptree_reference(&intel_texobj->mt, intel_image->mt);
-
-      DBG("%s: alloc obj %p level %d %dx%dx%d using new miptree %p\n",
-          __func__, texobj, image->Level,
-          image->Width, image->Height, image->Depth, intel_image->mt);
-   }
-
-   intel_texobj->needs_validate = true;
-
-   return true;
-}
-
-/**
- * ctx->Driver.AllocTextureStorage() handler.
- *
- * Compare this to _mesa_AllocTextureStorage_sw, which would call into
- * brw_alloc_texture_image_buffer() above.
- */
-static GLboolean
-brw_alloc_texture_storage(struct gl_context *ctx,
-                          struct gl_texture_object *texobj,
-                          GLsizei levels, GLsizei width,
-                          GLsizei height, GLsizei depth)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_texture_object *intel_texobj = brw_texture_object(texobj);
-   struct gl_texture_image *first_image = texobj->Image[0][0];
-   int num_samples = brw_quantize_num_samples(brw->screen,
-                                                first_image->NumSamples);
-   const int numFaces = _mesa_num_tex_faces(texobj->Target);
-   int face;
-   int level;
-
-   /* If the object's current miptree doesn't match what we need, make a new
-    * one.
-    */
-   if (!intel_texobj->mt ||
-       !brw_miptree_match_image(intel_texobj->mt, first_image) ||
-       intel_texobj->mt->last_level != levels - 1) {
-      brw_miptree_release(&intel_texobj->mt);
-
-      brw_get_image_dims(first_image, &width, &height, &depth);
-      intel_texobj->mt = brw_miptree_create(brw, texobj->Target,
-                                              first_image->TexFormat,
-                                              0, levels - 1,
-                                              width, height, depth,
-                                              MAX2(num_samples, 1),
-                                              MIPTREE_CREATE_DEFAULT);
-
-      if (intel_texobj->mt == NULL) {
-         return false;
-      }
-   }
-
-   for (face = 0; face < numFaces; face++) {
-      for (level = 0; level < levels; level++) {
-         struct gl_texture_image *image = texobj->Image[face][level];
-         struct brw_texture_image *intel_image = brw_texture_image(image);
-
-         image->NumSamples = num_samples;
-
-         _swrast_free_texture_image_buffer(ctx, image);
-         if (!_swrast_init_texture_image(image))
-            return false;
-
-         brw_miptree_reference(&intel_image->mt, intel_texobj->mt);
-      }
-   }
-
-   /* The miptree is in a validated state, so no need to check later. */
-   intel_texobj->needs_validate = false;
-   intel_texobj->validated_first_level = 0;
-   intel_texobj->validated_last_level = levels - 1;
-   intel_texobj->_Format = first_image->TexFormat;
-
-   return true;
-}
-
-
-static void
-brw_free_texture_image_buffer(struct gl_context * ctx,
-                              struct gl_texture_image *texImage)
-{
-   struct brw_texture_image *brw_image = brw_texture_image(texImage);
-
-   DBG("%s\n", __func__);
-
-   brw_miptree_release(&brw_image->mt);
-
-   _swrast_free_texture_image_buffer(ctx, texImage);
-}
-
-/**
- * Map texture memory/buffer into user space.
- * Note: the region of interest parameters are ignored here.
- * \param mode  bitmask of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT
- * \param mapOut  returns start of mapping of region of interest
- * \param rowStrideOut  returns row stride in bytes
- */
-static void
-brw_map_texture_image(struct gl_context *ctx,
-                      struct gl_texture_image *tex_image,
-                      GLuint slice,
-                      GLuint x, GLuint y, GLuint w, GLuint h,
-                      GLbitfield mode,
-                      GLubyte **map,
-                      GLint *out_stride)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_texture_image *intel_image = brw_texture_image(tex_image);
-   struct brw_mipmap_tree *mt = intel_image->mt;
-   ptrdiff_t stride;
-
-   /* Our texture data is always stored in a miptree. */
-   assert(mt);
-
-   /* Check that our caller wasn't confused about how to map a 1D texture. */
-   assert(tex_image->TexObject->Target != GL_TEXTURE_1D_ARRAY || h == 1);
-
-   /* brw_miptree_map operates on a unified "slice" number that references the
-    * cube face, since it's all just slices to the miptree code.
-    */
-   if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
-      slice = tex_image->Face;
-
-   brw_miptree_map(brw, mt,
-                     tex_image->Level + tex_image->TexObject->Attrib.MinLevel,
-                     slice + tex_image->TexObject->Attrib.MinLayer,
-                     x, y, w, h, mode,
-                     (void **)map, &stride);
-
-   *out_stride = stride;
-}
-
-static void
-brw_unmap_texture_image(struct gl_context *ctx,
-                        struct gl_texture_image *tex_image, GLuint slice)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_texture_image *intel_image = brw_texture_image(tex_image);
-   struct brw_mipmap_tree *mt = intel_image->mt;
-
-   if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
-      slice = tex_image->Face;
-
-   brw_miptree_unmap(brw, mt,
-         tex_image->Level + tex_image->TexObject->Attrib.MinLevel,
-         slice + tex_image->TexObject->Attrib.MinLayer);
-}
-
-static GLboolean
-brw_texture_view(struct gl_context *ctx,
-                 struct gl_texture_object *texObj,
-                 struct gl_texture_object *origTexObj)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_texture_object *intel_tex = brw_texture_object(texObj);
-   struct brw_texture_object *intel_orig_tex = brw_texture_object(origTexObj);
-
-   assert(intel_orig_tex->mt);
-   brw_miptree_reference(&intel_tex->mt, intel_orig_tex->mt);
-
-   /* Since we can only make views of immutable-format textures,
-    * we can assume that everything is in origTexObj's miptree.
-    *
-    * Mesa core has already made us a copy of all the teximage objects,
-    * except it hasn't copied our mt pointers, etc.
-    */
-   const int numFaces = _mesa_num_tex_faces(texObj->Target);
-   const int numLevels = texObj->Attrib.NumLevels;
-
-   int face;
-   int level;
-
-   for (face = 0; face < numFaces; face++) {
-      for (level = 0; level < numLevels; level++) {
-         struct gl_texture_image *image = texObj->Image[face][level];
-         struct brw_texture_image *intel_image = brw_texture_image(image);
-
-         brw_miptree_reference(&intel_image->mt, intel_orig_tex->mt);
-      }
-   }
-
-   /* The miptree is in a validated state, so no need to check later. */
-   intel_tex->needs_validate = false;
-   intel_tex->validated_first_level = 0;
-   intel_tex->validated_last_level = numLevels - 1;
-
-   /* Set the validated texture format, with the same adjustments that
-    * would have been applied to determine the underlying texture's
-    * mt->format.
-    */
-   intel_tex->_Format = brw_depth_format_for_depthstencil_format(
-         brw_lower_compressed_format(brw, texObj->Image[0][0]->TexFormat));
-
-   return GL_TRUE;
-}
-
-static void
-brw_texture_barrier(struct gl_context *ctx)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (devinfo->ver >= 6) {
-      brw_emit_pipe_control_flush(brw,
-                                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                                  PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                                  PIPE_CONTROL_CS_STALL);
-
-      brw_emit_pipe_control_flush(brw,
-                                  PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
-   } else {
-      brw_emit_mi_flush(brw);
-   }
-}
-
-/* Return the usual surface usage flags for the given format. */
-static isl_surf_usage_flags_t
-isl_surf_usage(mesa_format format)
-{
-   switch(_mesa_get_format_base_format(format)) {
-   case GL_DEPTH_COMPONENT:
-      return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
-   case GL_DEPTH_STENCIL:
-      return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT |
-             ISL_SURF_USAGE_TEXTURE_BIT;
-   case GL_STENCIL_INDEX:
-      return ISL_SURF_USAGE_STENCIL_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
-   default:
-      return ISL_SURF_USAGE_RENDER_TARGET_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
-   }
-}
-
-static GLboolean
-intel_texture_for_memory_object(struct gl_context *ctx,
-                                          struct gl_texture_object *tex_obj,
-                                          struct gl_memory_object *mem_obj,
-                                          GLsizei levels, GLsizei width,
-                                          GLsizei height, GLsizei depth,
-                                          GLuint64 offset)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_memory_object *intel_memobj = brw_memory_object(mem_obj);
-   struct brw_texture_object *intel_texobj = brw_texture_object(tex_obj);
-   struct gl_texture_image *image = tex_obj->Image[0][0];
-   struct isl_surf surf;
-
-   /* Only color formats are supported. */
-   if (!_mesa_is_format_color_format(image->TexFormat))
-      return GL_FALSE;
-
-   isl_tiling_flags_t tiling_flags = ISL_TILING_ANY_MASK;
-   if (tex_obj->TextureTiling == GL_LINEAR_TILING_EXT)
-      tiling_flags = ISL_TILING_LINEAR_BIT;
-
-   UNUSED const bool isl_surf_created_successfully =
-      isl_surf_init(&brw->screen->isl_dev, &surf,
-                    .dim = get_isl_surf_dim(tex_obj->Target),
-                    .format = brw_isl_format_for_mesa_format(image->TexFormat),
-                    .width = width,
-                    .height = height,
-                    .depth = depth,
-                    .levels = levels,
-                    .array_len = tex_obj->Target == GL_TEXTURE_3D ? 1 : depth,
-                    .samples = MAX2(image->NumSamples, 1),
-                    .usage = isl_surf_usage(image->TexFormat),
-                    .tiling_flags = tiling_flags);
-
-   assert(isl_surf_created_successfully);
-
-   intel_texobj->mt = brw_miptree_create_for_bo(brw,
-                                                intel_memobj->bo,
-                                                image->TexFormat,
-                                                offset,
-                                                width,
-                                                height,
-                                                depth,
-                                                surf.row_pitch_B,
-                                                surf.tiling,
-                                                MIPTREE_CREATE_NO_AUX);
-   assert(intel_texobj->mt);
-   brw_alloc_texture_image_buffer(ctx, image);
-
-   intel_texobj->needs_validate = false;
-   intel_texobj->validated_first_level = 0;
-   intel_texobj->validated_last_level = levels - 1;
-   intel_texobj->_Format = image->TexFormat;
-
-   return GL_TRUE;
-}
-
-void
-brw_init_texture_functions(struct dd_function_table *functions)
-{
-   functions->NewTextureObject = brw_new_texture_object;
-   functions->NewTextureImage = brw_new_texture_image;
-   functions->DeleteTextureImage = brw_delete_texture_image;
-   functions->DeleteTexture = brw_delete_texture_object;
-   functions->AllocTextureImageBuffer = brw_alloc_texture_image_buffer;
-   functions->FreeTextureImageBuffer = brw_free_texture_image_buffer;
-   functions->AllocTextureStorage = brw_alloc_texture_storage;
-   functions->MapTextureImage = brw_map_texture_image;
-   functions->UnmapTextureImage = brw_unmap_texture_image;
-   functions->TextureView = brw_texture_view;
-   functions->TextureBarrier = brw_texture_barrier;
-   functions->SetTextureStorageForMemoryObject = intel_texture_for_memory_object;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_tex.h b/src/mesa/drivers/dri/i965/brw_tex.h
deleted file mode 100644
index ee0837e..0000000
--- a/src/mesa/drivers/dri/i965/brw_tex.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef INTELTEX_INC
-#define INTELTEX_INC
-
-#include "main/mtypes.h"
-#include "main/formats.h"
-#include "brw_context.h"
-#include "brw_mipmap_tree.h"
-
-void brw_init_texture_functions(struct dd_function_table *functions);
-
-void brw_init_texture_image_functions(struct dd_function_table *functions);
-
-void brw_init_texture_copy_image_functions(struct dd_function_table *functs);
-
-void brw_init_copy_image_functions(struct dd_function_table *functions);
-
-void brw_set_texbuffer(__DRIcontext *pDRICtx,
-                       GLint target, __DRIdrawable *pDraw);
-void brw_set_texbuffer2(__DRIcontext *pDRICtx,
-                        GLint target, GLint format, __DRIdrawable *pDraw);
-void brw_release_texbuffer(__DRIcontext *pDRICtx, GLint target,
-                           __DRIdrawable *dPriv);
-
-struct brw_mipmap_tree *
-brw_miptree_create_for_teximage(struct brw_context *brw,
-                                struct brw_texture_object *brw_obj,
-                                struct brw_texture_image *brw_image,
-                                enum brw_miptree_create_flags flags);
-
-void brw_finalize_mipmap_tree(struct brw_context *brw,
-                                struct gl_texture_object *tex_obj);
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_tex_copy.c b/src/mesa/drivers/dri/i965/brw_tex_copy.c
deleted file mode 100644
index 26c1fcd..0000000
--- a/src/mesa/drivers/dri/i965/brw_tex_copy.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "main/mtypes.h"
-#include "main/enums.h"
-#include "main/image.h"
-#include "main/teximage.h"
-#include "main/texobj.h"
-#include "main/texstate.h"
-#include "main/fbobject.h"
-
-#include "drivers/common/meta.h"
-
-#include "brw_screen.h"
-#include "brw_mipmap_tree.h"
-#include "brw_fbo.h"
-#include "brw_tex.h"
-#include "brw_context.h"
-
-#define FILE_DEBUG_FLAG DEBUG_TEXTURE
-
-
-static void
-brw_copytexsubimage(struct gl_context *ctx, GLuint dims,
-                    struct gl_texture_image *texImage,
-                    GLint xoffset, GLint yoffset, GLint slice,
-                    struct gl_renderbuffer *rb,
-                    GLint x, GLint y,
-                    GLsizei width, GLsizei height)
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   /* Try BLORP first.  It can handle almost everything. */
-   if (brw_blorp_copytexsubimage(brw, rb, texImage, slice, x, y,
-                                 xoffset, yoffset, width, height))
-      return;
-
-   /* Finally, fall back to meta.  This will likely be slow. */
-   perf_debug("%s - fallback to swrast\n", __func__);
-   _mesa_meta_CopyTexSubImage(ctx, dims, texImage,
-                              xoffset, yoffset, slice,
-                              rb, x, y, width, height);
-}
-
-
-void
-brw_init_texture_copy_image_functions(struct dd_function_table *functions)
-{
-   functions->CopyTexSubImage = brw_copytexsubimage;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_tex_image.c b/src/mesa/drivers/dri/i965/brw_tex_image.c
deleted file mode 100644
index 7abe848..0000000
--- a/src/mesa/drivers/dri/i965/brw_tex_image.c
+++ /dev/null
@@ -1,992 +0,0 @@
-
-#include "main/macros.h"
-#include "main/mtypes.h"
-#include "main/enums.h"
-#include "main/bufferobj.h"
-#include "main/context.h"
-#include "main/formats.h"
-#include "main/glformats.h"
-#include "main/image.h"
-#include "main/pbo.h"
-#include "main/renderbuffer.h"
-#include "main/texcompress.h"
-#include "main/texgetimage.h"
-#include "main/texobj.h"
-#include "main/teximage.h"
-#include "main/texstore.h"
-#include "main/glthread.h"
-
-#include "drivers/common/meta.h"
-
-#include "brw_mipmap_tree.h"
-#include "brw_buffer_objects.h"
-#include "brw_batch.h"
-#include "brw_tex.h"
-#include "brw_fbo.h"
-#include "brw_image.h"
-#include "brw_context.h"
-#include "brw_blorp.h"
-
-#define FILE_DEBUG_FLAG DEBUG_TEXTURE
-
-/* Make sure one doesn't end up shrinking base level zero unnecessarily.
- * Determining the base level dimension by shifting higher level dimension
- * ends up in off-by-one value in case base level has NPOT size (for example,
- * 293 != 146 << 1).
- * Choose the original base level dimension when shifted dimensions agree.
- * Otherwise assume real resize is intended and use the new shifted value.
- */
-static unsigned
-get_base_dim(unsigned old_base_dim, unsigned new_level_dim, unsigned level)
-{
-   const unsigned old_level_dim = old_base_dim >> level;
-   const unsigned new_base_dim = new_level_dim << level;
-
-   return old_level_dim == new_level_dim ? old_base_dim : new_base_dim;
-}
-
-/* Work back from the specified level of the image to the baselevel and create a
- * miptree of that size.
- */
-struct brw_mipmap_tree *
-brw_miptree_create_for_teximage(struct brw_context *brw,
-                                struct brw_texture_object *brw_obj,
-                                struct brw_texture_image *brw_image,
-                                enum brw_miptree_create_flags flags)
-{
-   GLuint lastLevel;
-   int width, height, depth;
-   unsigned old_width = 0, old_height = 0, old_depth = 0;
-   const struct brw_mipmap_tree *old_mt = brw_obj->mt;
-   const unsigned level = brw_image->base.Base.Level;
-
-   brw_get_image_dims(&brw_image->base.Base, &width, &height, &depth);
-
-   if (old_mt) {
-      old_width = old_mt->surf.logical_level0_px.width;
-      old_height = old_mt->surf.logical_level0_px.height;
-      old_depth = old_mt->surf.dim == ISL_SURF_DIM_3D ?
-                     old_mt->surf.logical_level0_px.depth :
-                     old_mt->surf.logical_level0_px.array_len;
-   }
-
-   DBG("%s\n", __func__);
-
-   /* Figure out image dimensions at start level. */
-   switch(brw_obj->base.Target) {
-   case GL_TEXTURE_2D_MULTISAMPLE:
-   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
-   case GL_TEXTURE_RECTANGLE:
-   case GL_TEXTURE_EXTERNAL_OES:
-      assert(level == 0);
-      break;
-   case GL_TEXTURE_3D:
-      depth = old_mt ? get_base_dim(old_depth, depth, level) :
-                       depth << level;
-      FALLTHROUGH;
-   case GL_TEXTURE_2D:
-   case GL_TEXTURE_2D_ARRAY:
-   case GL_TEXTURE_CUBE_MAP:
-   case GL_TEXTURE_CUBE_MAP_ARRAY:
-      height = old_mt ? get_base_dim(old_height, height, level) :
-                        height << level;
-      FALLTHROUGH;
-   case GL_TEXTURE_1D:
-   case GL_TEXTURE_1D_ARRAY:
-      width = old_mt ? get_base_dim(old_width, width, level) :
-                       width << level;
-      break;
-   default:
-      unreachable("Unexpected target");
-   }
-
-   /* Guess a reasonable value for lastLevel.  This is probably going
-    * to be wrong fairly often and might mean that we have to look at
-    * resizable buffers, or require that buffers implement lazy
-    * pagetable arrangements.
-    */
-   if ((brw_obj->base.Sampler.Attrib.MinFilter == GL_NEAREST ||
-        brw_obj->base.Sampler.Attrib.MinFilter == GL_LINEAR) &&
-       brw_image->base.Base.Level == 0 &&
-       !brw_obj->base.Attrib.GenerateMipmap) {
-      lastLevel = 0;
-   } else {
-      lastLevel = _mesa_get_tex_max_num_levels(brw_obj->base.Target,
-                                               width, height, depth) - 1;
-   }
-
-   return brw_miptree_create(brw,
-                             brw_obj->base.Target,
-                             brw_image->base.Base.TexFormat,
-                             0,
-                             lastLevel,
-                             width,
-                             height,
-                             depth,
-                             MAX2(brw_image->base.Base.NumSamples, 1),
-                             flags);
-}
-
-static bool
-brw_texsubimage_blorp(struct brw_context *brw, GLuint dims,
-                      struct gl_texture_image *tex_image,
-                      unsigned x, unsigned y, unsigned z,
-                      unsigned width, unsigned height, unsigned depth,
-                      GLenum format, GLenum type, const void *pixels,
-                      const struct gl_pixelstore_attrib *packing)
-{
-   struct brw_texture_image *intel_image = brw_texture_image(tex_image);
-   const unsigned mt_level = tex_image->Level + tex_image->TexObject->Attrib.MinLevel;
-   const unsigned mt_z = tex_image->TexObject->Attrib.MinLayer + tex_image->Face + z;
-
-   /* The blorp path can't understand crazy format hackery */
-   if (_mesa_base_tex_format(&brw->ctx, tex_image->InternalFormat) !=
-       _mesa_get_format_base_format(tex_image->TexFormat))
-      return false;
-
-   return brw_blorp_upload_miptree(brw, intel_image->mt, tex_image->TexFormat,
-                                   mt_level, x, y, mt_z, width, height, depth,
-                                   tex_image->TexObject->Target, format, type,
-                                   pixels, packing);
-}
-
-/**
- * \brief A fast path for glTexImage and glTexSubImage.
- *
- * This fast path is taken when the texture format is BGRA, RGBA,
- * A or L and when the texture memory is X- or Y-tiled.  It uploads
- * the texture data by mapping the texture memory without a GTT fence, thus
- * acquiring a tiled view of the memory, and then copying sucessive
- * spans within each tile.
- *
- * This is a performance win over the conventional texture upload path because
- * it avoids the performance penalty of writing through the write-combine
- * buffer. In the conventional texture upload path,
- * texstore.c:store_texsubimage(), the texture memory is mapped through a GTT
- * fence, thus acquiring a linear view of the memory, then each row in the
- * image is memcpy'd. In this fast path, we replace each row's copy with
- * a sequence of copies over each linear span in tile.
- *
- * One use case is Google Chrome's paint rectangles.  Chrome (as
- * of version 21) renders each page as a tiling of 256x256 GL_BGRA textures.
- * Each page's content is initially uploaded with glTexImage2D and damaged
- * regions are updated with glTexSubImage2D. On some workloads, the
- * performance gain of this fastpath on Sandybridge is over 5x.
- */
-static bool
-brw_texsubimage_tiled_memcpy(struct gl_context * ctx,
-                             GLuint dims,
-                             struct gl_texture_image *texImage,
-                             GLint xoffset, GLint yoffset, GLint zoffset,
-                             GLsizei width, GLsizei height, GLsizei depth,
-                             GLenum format, GLenum type,
-                             const GLvoid *pixels,
-                             const struct gl_pixelstore_attrib *packing)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct brw_texture_image *image = brw_texture_image(texImage);
-   int src_pitch;
-
-   /* The miptree's buffer. */
-   struct brw_bo *bo;
-
-   uint32_t cpp;
-   isl_memcpy_type copy_type;
-
-   /* This fastpath is restricted to specific texture types:
-    * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
-    * more types.
-    *
-    * FINISHME: The restrictions below on packing alignment and packing row
-    * length are likely unneeded now because we calculate the source stride
-    * with _mesa_image_row_stride. However, before removing the restrictions
-    * we need tests.
-    */
-   if (!devinfo->has_llc ||
-       !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
-       !(texImage->TexObject->Target == GL_TEXTURE_2D ||
-         texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
-       pixels == NULL ||
-       packing->BufferObj ||
-       packing->Alignment > 4 ||
-       packing->SkipPixels > 0 ||
-       packing->SkipRows > 0 ||
-       (packing->RowLength != 0 && packing->RowLength != width) ||
-       packing->SwapBytes ||
-       packing->LsbFirst ||
-       packing->Invert)
-      return false;
-
-   /* Only a simple blit, no scale, bias or other mapping. */
-   if (ctx->_ImageTransferState)
-      return false;
-
-   copy_type = brw_miptree_get_memcpy_type(texImage->TexFormat, format, type,
-                                             &cpp);
-   if (copy_type == ISL_MEMCPY_INVALID)
-      return false;
-
-   /* If this is a nontrivial texture view, let another path handle it instead. */
-   if (texImage->TexObject->Attrib.MinLayer)
-      return false;
-
-   if (!image->mt ||
-       (image->mt->surf.tiling != ISL_TILING_X &&
-        image->mt->surf.tiling != ISL_TILING_Y0)) {
-      /* The algorithm is written only for X- or Y-tiled memory. */
-      return false;
-   }
-
-   /* linear_to_tiled() assumes that if the object is swizzled, it is using
-    * I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y.  This is only
-    * true on gfx5 and above.
-    *
-    * The killer on top is that some gfx4 have an L-shaped swizzle mode, where
-    * parts of the memory aren't swizzled at all. Userspace just can't handle
-    * that.
-    */
-   if (devinfo->ver < 5 && devinfo->has_bit6_swizzle)
-      return false;
-
-   int level = texImage->Level + texImage->TexObject->Attrib.MinLevel;
-
-   /* Since we are going to write raw data to the miptree, we need to resolve
-    * any pending fast color clears before we start.
-    */
-   assert(image->mt->surf.logical_level0_px.depth == 1);
-   assert(image->mt->surf.logical_level0_px.array_len == 1);
-
-   brw_miptree_access_raw(brw, image->mt, level, 0, true);
-
-   bo = image->mt->bo;
-
-   if (brw_batch_references(&brw->batch, bo)) {
-      perf_debug("Flushing before mapping a referenced bo.\n");
-      brw_batch_flush(brw);
-   }
-
-   void *map = brw_bo_map(brw, bo, MAP_WRITE | MAP_RAW);
-   if (map == NULL) {
-      DBG("%s: failed to map bo\n", __func__);
-      return false;
-   }
-
-   src_pitch = _mesa_image_row_stride(packing, width, format, type);
-
-   /* We postponed printing this message until having committed to executing
-    * the function.
-    */
-   DBG("%s: level=%d offset=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
-       "mesa_format=0x%x tiling=%d "
-       "packing=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d) ",
-       __func__, texImage->Level, xoffset, yoffset, width, height,
-       format, type, texImage->TexFormat, image->mt->surf.tiling,
-       packing->Alignment, packing->RowLength, packing->SkipPixels,
-       packing->SkipRows);
-
-   /* Adjust x and y offset based on miplevel */
-   unsigned level_x, level_y;
-   brw_miptree_get_image_offset(image->mt, level, 0, &level_x, &level_y);
-   xoffset += level_x;
-   yoffset += level_y;
-
-   isl_memcpy_linear_to_tiled(
-      xoffset * cpp, (xoffset + width) * cpp,
-      yoffset, yoffset + height,
-      map,
-      pixels,
-      image->mt->surf.row_pitch_B, src_pitch,
-      devinfo->has_bit6_swizzle,
-      image->mt->surf.tiling,
-      copy_type
-   );
-
-   brw_bo_unmap(bo);
-   return true;
-}
-
-
-static void
-brw_upload_tex(struct gl_context * ctx,
-               GLuint dims,
-               struct gl_texture_image *texImage,
-               GLint xoffset, GLint yoffset, GLint zoffset,
-               GLsizei width, GLsizei height, GLsizei depth,
-               GLenum format, GLenum type,
-               const GLvoid * pixels,
-               const struct gl_pixelstore_attrib *packing)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_mipmap_tree *mt = brw_texture_image(texImage)->mt;
-   bool ok;
-
-   /* Check that there is actually data to store. */
-   if (pixels == NULL && !packing->BufferObj)
-      return;
-
-   bool tex_busy = mt &&
-      (brw_batch_references(&brw->batch, mt->bo) || brw_bo_busy(mt->bo));
-
-   if (packing->BufferObj || tex_busy ||
-       mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
-      ok = brw_texsubimage_blorp(brw, dims, texImage,
-                                   xoffset, yoffset, zoffset,
-                                   width, height, depth, format, type,
-                                   pixels, packing);
-      if (ok)
-         return;
-   }
-
-   ok = brw_texsubimage_tiled_memcpy(ctx, dims, texImage,
-                                       xoffset, yoffset, zoffset,
-                                       width, height, depth,
-                                       format, type, pixels, packing);
-   if (ok)
-     return;
-
-   _mesa_store_texsubimage(ctx, dims, texImage,
-                           xoffset, yoffset, zoffset,
-                           width, height, depth,
-                           format, type, pixels, packing);
-}
-
-
-static void
-brw_teximage(struct gl_context * ctx,
-             GLuint dims,
-             struct gl_texture_image *texImage,
-             GLenum format, GLenum type, const void *pixels,
-             const struct gl_pixelstore_attrib *unpack)
-{
-   DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
-       __func__, _mesa_get_format_name(texImage->TexFormat),
-       _mesa_enum_to_string(texImage->TexObject->Target),
-       _mesa_enum_to_string(format), _mesa_enum_to_string(type),
-       texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
-
-   /* Allocate storage for texture data. */
-   if (!ctx->Driver.AllocTextureImageBuffer(ctx, texImage)) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage%uD", dims);
-      return;
-   }
-
-   assert(brw_texture_image(texImage)->mt);
-
-   brw_upload_tex(ctx, dims, texImage, 0, 0, 0,
-                    texImage->Width, texImage->Height, texImage->Depth,
-                    format, type, pixels, unpack);
-}
-
-
-static void
-brw_texsubimage(struct gl_context * ctx,
-                GLuint dims,
-                struct gl_texture_image *texImage,
-                GLint xoffset, GLint yoffset, GLint zoffset,
-                GLsizei width, GLsizei height, GLsizei depth,
-                GLenum format, GLenum type,
-                const GLvoid * pixels,
-                const struct gl_pixelstore_attrib *packing)
-{
-   DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
-       __func__, _mesa_get_format_name(texImage->TexFormat),
-       _mesa_enum_to_string(texImage->TexObject->Target),
-       _mesa_enum_to_string(format), _mesa_enum_to_string(type),
-       texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
-
-   brw_upload_tex(ctx, dims, texImage, xoffset, yoffset, zoffset,
-                    width, height, depth, format, type, pixels, packing);
-}
-
-
-static void
-brw_set_texture_image_mt(struct brw_context *brw,
-                         struct gl_texture_image *image,
-                         GLenum internal_format,
-                         mesa_format format,
-                         struct brw_mipmap_tree *mt)
-
-{
-   struct gl_texture_object *texobj = image->TexObject;
-   struct brw_texture_object *intel_texobj = brw_texture_object(texobj);
-   struct brw_texture_image *intel_image = brw_texture_image(image);
-
-   _mesa_init_teximage_fields(&brw->ctx, image,
-                              mt->surf.logical_level0_px.width,
-                              mt->surf.logical_level0_px.height, 1,
-                              0, internal_format, format);
-
-   brw->ctx.Driver.FreeTextureImageBuffer(&brw->ctx, image);
-
-   intel_texobj->needs_validate = true;
-   intel_image->base.RowStride = mt->surf.row_pitch_B / mt->cpp;
-   assert(mt->surf.row_pitch_B % mt->cpp == 0);
-
-   brw_miptree_reference(&intel_image->mt, mt);
-
-   /* Immediately validate the image to the object. */
-   brw_miptree_reference(&intel_texobj->mt, mt);
-}
-
-
-void
-brw_set_texbuffer2(__DRIcontext *pDRICtx, GLint target,
-                   GLint texture_format,
-                   __DRIdrawable *dPriv)
-{
-   struct gl_framebuffer *fb = dPriv->driverPrivate;
-   struct brw_context *brw = pDRICtx->driverPrivate;
-   struct gl_context *ctx = &brw->ctx;
-   struct brw_renderbuffer *rb;
-   struct gl_texture_object *texObj;
-   struct gl_texture_image *texImage;
-   mesa_format texFormat = MESA_FORMAT_NONE;
-   GLenum internal_format = 0;
-
-   _mesa_glthread_finish(ctx);
-
-   texObj = _mesa_get_current_tex_object(ctx, target);
-
-   if (!texObj)
-      return;
-
-   if (dPriv->lastStamp != dPriv->dri2.stamp ||
-       !pDRICtx->driScreenPriv->dri2.useInvalidate)
-      brw_update_renderbuffers(pDRICtx, dPriv);
-
-   rb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
-   /* If the miptree isn't set, then intel_update_renderbuffers was unable
-    * to get the BO for the drawable from the window system.
-    */
-   if (!rb || !rb->mt)
-      return;
-
-   /* Neither the EGL and GLX texture_from_pixmap specs say anything about
-    * sRGB.  They are both from a time where sRGB was considered an extra
-    * encoding step you did as part of rendering/blending and not a format.
-    * Even though we have concept of sRGB visuals, X has classically assumed
-    * that your data is just bits and sRGB rendering is entirely a client-side
-    * rendering construct.  The assumption is that the result of BindTexImage
-    * is a texture with a linear format even if it was rendered with sRGB
-    * encoding enabled.
-    */
-   texFormat = _mesa_get_srgb_format_linear(brw_rb_format(rb));
-
-   if (rb->mt->cpp == 4) {
-      /* The extra texture_format parameter indicates whether the alpha
-       * channel should be respected or ignored.  If we set internal_format to
-       * GL_RGB, the texture handling code is smart enough to swap the format
-       * or apply a swizzle if the underlying format is RGBA so we don't need
-       * to stomp it to RGBX or anything like that.
-       */
-      if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
-         internal_format = GL_RGB;
-      else
-         internal_format = GL_RGBA;
-   } else if (rb->mt->cpp == 2) {
-      internal_format = GL_RGB;
-   }
-
-   brw_miptree_finish_external(brw, rb->mt);
-
-   _mesa_lock_texture(&brw->ctx, texObj);
-   texImage = _mesa_get_tex_image(ctx, texObj, target, 0);
-   brw_set_texture_image_mt(brw, texImage, internal_format,
-                              texFormat, rb->mt);
-   _mesa_unlock_texture(&brw->ctx, texObj);
-}
-
-void
-brw_release_texbuffer(__DRIcontext *pDRICtx, GLint target,
-                      __DRIdrawable *dPriv)
-{
-   struct brw_context *brw = pDRICtx->driverPrivate;
-   struct gl_context *ctx = &brw->ctx;
-   struct gl_texture_object *tex_obj;
-   struct brw_texture_object *intel_tex;
-
-   tex_obj = _mesa_get_current_tex_object(ctx, target);
-   if (!tex_obj)
-      return;
-
-   _mesa_lock_texture(&brw->ctx, tex_obj);
-
-   intel_tex = brw_texture_object(tex_obj);
-   if (!intel_tex->mt) {
-      _mesa_unlock_texture(&brw->ctx, tex_obj);
-      return;
-   }
-
-   /* The brw_miptree_prepare_external below as well as the finish_external
-    * above in brw_set_texbuffer2 *should* do nothing.  The BindTexImage call
-    * from both GLX and EGL has TexImage2D and not TexSubImage2D semantics so
-    * the texture is not immutable.  This means that the user cannot create a
-    * texture view of the image with a different format.  Since the only three
-    * formats available when using BindTexImage are all UNORM, we can never
-    * end up with an sRGB format being used for texturing and so we shouldn't
-    * get any format-related resolves when texturing from it.
-    *
-    * While very unlikely, it is possible that the client could use the bound
-    * texture with GL_ARB_image_load_store.  In that case, we'll do a resolve
-    * but that's not actually a problem as it just means that we lose
-    * compression on this texture until the next time it's used as a render
-    * target.
-    *
-    * The only other way we could end up with an unexpected aux usage would be
-    * if we rendered to the image from the same context as we have it bound as
-    * a texture between BindTexImage and ReleaseTexImage.  However, the spec
-    * clearly calls this case out and says you shouldn't do that.  It doesn't
-    * explicitly prevent binding the texture to a framebuffer but it says the
-    * results of trying to render to it while bound are undefined.
-    *
-    * Just to keep everything safe and sane, we do a prepare_external but it
-    * should be a no-op in almost all cases.  On the off chance that someone
-    * ever triggers this, we should at least warn them.
-    */
-   if (intel_tex->mt->aux_buf &&
-       brw_miptree_get_aux_state(intel_tex->mt, 0, 0) !=
-       isl_drm_modifier_get_default_aux_state(intel_tex->mt->drm_modifier)) {
-      _mesa_warning(ctx, "Aux state changed between BindTexImage and "
-                         "ReleaseTexImage.  Most likely someone tried to draw "
-                         "to the pixmap bound in BindTexImage or used it with "
-                         "image_load_store.");
-   }
-
-   brw_miptree_prepare_external(brw, intel_tex->mt);
-
-   _mesa_unlock_texture(&brw->ctx, tex_obj);
-}
-
-static GLboolean
-brw_bind_renderbuffer_tex_image(struct gl_context *ctx,
-                                struct gl_renderbuffer *rb,
-                                struct gl_texture_image *image)
-{
-   struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-   struct brw_texture_image *intel_image = brw_texture_image(image);
-   struct gl_texture_object *texobj = image->TexObject;
-   struct brw_texture_object *intel_texobj = brw_texture_object(texobj);
-
-   /* We can only handle RB allocated with AllocRenderbufferStorage, or
-    * window-system renderbuffers.
-    */
-   assert(!rb->TexImage);
-
-   if (!irb->mt)
-      return false;
-
-   _mesa_lock_texture(ctx, texobj);
-   _mesa_init_teximage_fields(ctx, image, rb->Width, rb->Height, 1, 0,
-                              rb->InternalFormat, rb->Format);
-   image->NumSamples = rb->NumSamples;
-
-   brw_miptree_reference(&intel_image->mt, irb->mt);
-
-   /* Immediately validate the image to the object. */
-   brw_miptree_reference(&intel_texobj->mt, intel_image->mt);
-
-   intel_texobj->needs_validate = true;
-   _mesa_unlock_texture(ctx, texobj);
-
-   return true;
-}
-
-void
-brw_set_texbuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
-{
-   /* The old interface didn't have the format argument, so copy our
-    * implementation's behavior at the time.
-    */
-   brw_set_texbuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
-}
-
-static void
-brw_image_target_texture(struct gl_context *ctx, GLenum target,
-                         struct gl_texture_object *texObj,
-                         struct gl_texture_image *texImage,
-                         GLeglImageOES image_handle,
-                         bool storage)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_mipmap_tree *mt;
-   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
-   __DRIimage *image;
-
-   image = dri_screen->dri2.image->lookupEGLImage(dri_screen, image_handle,
-                                                  dri_screen->loaderPrivate);
-   if (image == NULL)
-      return;
-
-   /* Disallow depth/stencil textures: we don't have a way to pass the
-    * separate stencil miptree of a GL_DEPTH_STENCIL texture through.
-    */
-   if (image->has_depthstencil) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, __func__);
-      return;
-   }
-
-   mt = brw_miptree_create_for_dri_image(brw, image, target, image->format,
-                                           false);
-   if (mt == NULL)
-      return;
-
-   struct brw_texture_object *intel_texobj = brw_texture_object(texObj);
-   intel_texobj->planar_format = image->planar_format;
-   intel_texobj->yuv_color_space = image->yuv_color_space;
-
-   GLenum internal_format =
-      image->internal_format != 0 ?
-      image->internal_format : _mesa_get_format_base_format(mt->format);
-
-   /* Fix the internal format when _mesa_get_format_base_format(mt->format)
-    * isn't a valid one for that particular format.
-    */
-   if (brw->mesa_format_supports_render[image->format]) {
-      if (image->format == MESA_FORMAT_R10G10B10A2_UNORM ||
-          image->format == MESA_FORMAT_R10G10B10X2_UNORM ||
-          image->format == MESA_FORMAT_B10G10R10A2_UNORM ||
-          image->format == MESA_FORMAT_B10G10R10X2_UNORM)
-         internal_format = GL_RGB10_A2;
-   }
-
-   /* Guess sized internal format for dma-bufs, as specified by
-    * EXT_EGL_image_storage.
-    */
-   if (storage && target == GL_TEXTURE_2D && image->imported_dmabuf) {
-      internal_format = driGLFormatToSizedInternalGLFormat(image->format);
-      if (internal_format == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION, __func__);
-         return;
-      }
-   }
-
-   brw_set_texture_image_mt(brw, texImage, internal_format, mt->format, mt);
-   brw_miptree_release(&mt);
-}
-
-static void
-brw_image_target_texture_2d(struct gl_context *ctx, GLenum target,
-                            struct gl_texture_object *texObj,
-                            struct gl_texture_image *texImage,
-                            GLeglImageOES image_handle)
-{
-   brw_image_target_texture(ctx, target, texObj, texImage, image_handle,
-                              false);
-}
-
-static void
-brw_image_target_tex_storage(struct gl_context *ctx, GLenum target,
-                             struct gl_texture_object *texObj,
-                             struct gl_texture_image *texImage,
-                             GLeglImageOES image_handle)
-{
-   struct brw_texture_object *intel_texobj = brw_texture_object(texObj);
-   brw_image_target_texture(ctx, target, texObj, texImage, image_handle,
-                              true);
-
-   /* The miptree is in a validated state, so no need to check later. */
-   intel_texobj->needs_validate = false;
-   intel_texobj->validated_first_level = 0;
-   intel_texobj->validated_last_level = 0;
-   intel_texobj->_Format = texImage->TexFormat;
-}
-
-static bool
-brw_gettexsubimage_blorp(struct brw_context *brw,
-                         struct gl_texture_image *tex_image,
-                         unsigned x, unsigned y, unsigned z,
-                         unsigned width, unsigned height, unsigned depth,
-                         GLenum format, GLenum type, const void *pixels,
-                         const struct gl_pixelstore_attrib *packing)
-{
-   struct brw_texture_image *intel_image = brw_texture_image(tex_image);
-   const unsigned mt_level = tex_image->Level + tex_image->TexObject->Attrib.MinLevel;
-   const unsigned mt_z = tex_image->TexObject->Attrib.MinLayer + tex_image->Face + z;
-
-   /* The blorp path can't understand crazy format hackery */
-   if (_mesa_base_tex_format(&brw->ctx, tex_image->InternalFormat) !=
-       _mesa_get_format_base_format(tex_image->TexFormat))
-      return false;
-
-   return brw_blorp_download_miptree(brw, intel_image->mt,
-                                     tex_image->TexFormat, SWIZZLE_XYZW,
-                                     mt_level, x, y, mt_z,
-                                     width, height, depth,
-                                     tex_image->TexObject->Target,
-                                     format, type, false, pixels, packing);
-}
-
-/**
- * \brief A fast path for glGetTexImage.
- *
- * \see brw_readpixels_tiled_memcpy()
- */
-static bool
-brw_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
-                                struct gl_texture_image *texImage,
-                                GLint xoffset, GLint yoffset,
-                                GLsizei width, GLsizei height,
-                                GLenum format, GLenum type,
-                                GLvoid *pixels,
-                                const struct gl_pixelstore_attrib *packing)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct brw_texture_image *image = brw_texture_image(texImage);
-   int dst_pitch;
-
-   /* The miptree's buffer. */
-   struct brw_bo *bo;
-
-   uint32_t cpp;
-   isl_memcpy_type copy_type;
-
-   /* This fastpath is restricted to specific texture types:
-    * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
-    * more types.
-    *
-    * FINISHME: The restrictions below on packing alignment and packing row
-    * length are likely unneeded now because we calculate the destination stride
-    * with _mesa_image_row_stride. However, before removing the restrictions
-    * we need tests.
-    */
-   if (!devinfo->has_llc ||
-       !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
-       !(texImage->TexObject->Target == GL_TEXTURE_2D ||
-         texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
-       pixels == NULL ||
-       packing->BufferObj ||
-       packing->Alignment > 4 ||
-       packing->SkipPixels > 0 ||
-       packing->SkipRows > 0 ||
-       (packing->RowLength != 0 && packing->RowLength != width) ||
-       packing->SwapBytes ||
-       packing->LsbFirst ||
-       packing->Invert)
-      return false;
-
-   /* We can't handle copying from RGBX or BGRX because the tiled_memcpy
-    * function doesn't set the last channel to 1. Note this checks BaseFormat
-    * rather than TexFormat in case the RGBX format is being simulated with an
-    * RGBA format.
-    */
-   if (texImage->_BaseFormat == GL_RGB)
-      return false;
-
-   copy_type = brw_miptree_get_memcpy_type(texImage->TexFormat, format, type,
-                                             &cpp);
-   if (copy_type == ISL_MEMCPY_INVALID)
-      return false;
-
-   /* If this is a nontrivial texture view, let another path handle it instead. */
-   if (texImage->TexObject->Attrib.MinLayer)
-      return false;
-
-   if (!image->mt ||
-       (image->mt->surf.tiling != ISL_TILING_X &&
-        image->mt->surf.tiling != ISL_TILING_Y0)) {
-      /* The algorithm is written only for X- or Y-tiled memory. */
-      return false;
-   }
-
-   /* tiled_to_linear() assumes that if the object is swizzled, it is using
-    * I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y.  This is only
-    * true on gfx5 and above.
-    *
-    * The killer on top is that some gfx4 have an L-shaped swizzle mode, where
-    * parts of the memory aren't swizzled at all. Userspace just can't handle
-    * that.
-    */
-   if (devinfo->ver < 5 && devinfo->has_bit6_swizzle)
-      return false;
-
-   int level = texImage->Level + texImage->TexObject->Attrib.MinLevel;
-
-   /* Since we are going to write raw data to the miptree, we need to resolve
-    * any pending fast color clears before we start.
-    */
-   assert(image->mt->surf.logical_level0_px.depth == 1);
-   assert(image->mt->surf.logical_level0_px.array_len == 1);
-
-   brw_miptree_access_raw(brw, image->mt, level, 0, true);
-
-   bo = image->mt->bo;
-
-   if (brw_batch_references(&brw->batch, bo)) {
-      perf_debug("Flushing before mapping a referenced bo.\n");
-      brw_batch_flush(brw);
-   }
-
-   void *map = brw_bo_map(brw, bo, MAP_READ | MAP_RAW);
-   if (map == NULL) {
-      DBG("%s: failed to map bo\n", __func__);
-      return false;
-   }
-
-   dst_pitch = _mesa_image_row_stride(packing, width, format, type);
-
-   DBG("%s: level=%d x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
-       "mesa_format=0x%x tiling=%d "
-       "packing=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n",
-       __func__, texImage->Level, xoffset, yoffset, width, height,
-       format, type, texImage->TexFormat, image->mt->surf.tiling,
-       packing->Alignment, packing->RowLength, packing->SkipPixels,
-       packing->SkipRows);
-
-   /* Adjust x and y offset based on miplevel */
-   unsigned level_x, level_y;
-   brw_miptree_get_image_offset(image->mt, level, 0, &level_x, &level_y);
-   xoffset += level_x;
-   yoffset += level_y;
-
-   isl_memcpy_tiled_to_linear(
-      xoffset * cpp, (xoffset + width) * cpp,
-      yoffset, yoffset + height,
-      pixels,
-      map,
-      dst_pitch, image->mt->surf.row_pitch_B,
-      devinfo->has_bit6_swizzle,
-      image->mt->surf.tiling,
-      copy_type
-   );
-
-   brw_bo_unmap(bo);
-   return true;
-}
-
-static void
-brw_get_tex_sub_image(struct gl_context *ctx,
-                      GLint xoffset, GLint yoffset, GLint zoffset,
-                      GLsizei width, GLsizei height, GLint depth,
-                      GLenum format, GLenum type, GLvoid *pixels,
-                      struct gl_texture_image *texImage)
-{
-   struct brw_context *brw = brw_context(ctx);
-   bool ok;
-
-   DBG("%s\n", __func__);
-
-   if (ctx->Pack.BufferObj) {
-      if (brw_gettexsubimage_blorp(brw, texImage,
-                                     xoffset, yoffset, zoffset,
-                                     width, height, depth, format, type,
-                                     pixels, &ctx->Pack))
-         return;
-
-      perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
-   }
-
-   ok = brw_gettexsubimage_tiled_memcpy(ctx, texImage, xoffset, yoffset,
-                                          width, height,
-                                          format, type, pixels, &ctx->Pack);
-
-   if(ok)
-      return;
-
-   _mesa_meta_GetTexSubImage(ctx, xoffset, yoffset, zoffset,
-                             width, height, depth,
-                             format, type, pixels, texImage);
-
-   DBG("%s - DONE\n", __func__);
-}
-
-static void
-flush_astc_denorms(struct gl_context *ctx, GLuint dims,
-                   struct gl_texture_image *texImage,
-                   GLint xoffset, GLint yoffset, GLint zoffset,
-                   GLsizei width, GLsizei height, GLsizei depth)
-{
-   struct compressed_pixelstore store;
-   _mesa_compute_compressed_pixelstore(dims, texImage->TexFormat,
-                                       width, height, depth,
-                                       &ctx->Unpack, &store);
-
-   for (int slice = 0; slice < store.CopySlices; slice++) {
-
-      /* Map dest texture buffer */
-      GLubyte *dstMap;
-      GLint dstRowStride;
-      ctx->Driver.MapTextureImage(ctx, texImage, slice + zoffset,
-                                  xoffset, yoffset, width, height,
-                                  GL_MAP_READ_BIT | GL_MAP_WRITE_BIT,
-                                  &dstMap, &dstRowStride);
-      if (!dstMap)
-         continue;
-
-      for (int i = 0; i < store.CopyRowsPerSlice; i++) {
-
-         /* An ASTC block is stored in little endian mode. The byte that
-          * contains bits 0..7 is stored at the lower address in memory.
-          */
-         struct astc_void_extent {
-            uint16_t header : 12;
-            uint16_t dontcare[3];
-            uint16_t R;
-            uint16_t G;
-            uint16_t B;
-            uint16_t A;
-         } *blocks = (struct astc_void_extent*) dstMap;
-
-         /* Iterate over every copied block in the row */
-         for (int j = 0; j < store.CopyBytesPerRow / 16; j++) {
-
-            /* Check if the header matches that of an LDR void-extent block */
-            if (blocks[j].header == 0xDFC) {
-
-               /* Flush UNORM16 values that would be denormalized */
-               if (blocks[j].A < 4) blocks[j].A = 0;
-               if (blocks[j].B < 4) blocks[j].B = 0;
-               if (blocks[j].G < 4) blocks[j].G = 0;
-               if (blocks[j].R < 4) blocks[j].R = 0;
-            }
-         }
-
-         dstMap += dstRowStride;
-      }
-
-      ctx->Driver.UnmapTextureImage(ctx, texImage, slice + zoffset);
-   }
-}
-
-
-static void
-brw_compressedtexsubimage(struct gl_context *ctx, GLuint dims,
-                          struct gl_texture_image *texImage,
-                          GLint xoffset, GLint yoffset, GLint zoffset,
-                          GLsizei width, GLsizei height, GLsizei depth,
-                          GLenum format,
-                          GLsizei imageSize, const GLvoid *data)
-{
-   /* Upload the compressed data blocks */
-   _mesa_store_compressed_texsubimage(ctx, dims, texImage,
-                                      xoffset, yoffset, zoffset,
-                                      width, height, depth,
-                                      format, imageSize, data);
-
-   /* Fix up copied ASTC blocks if necessary */
-   GLenum gl_format = _mesa_compressed_format_to_glenum(ctx,
-                                                        texImage->TexFormat);
-   bool is_linear_astc = _mesa_is_astc_format(gl_format) &&
-                        !_mesa_is_srgb_format(gl_format);
-   struct brw_context *brw = (struct brw_context*) ctx;
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   if (devinfo->ver == 9 &&
-       !intel_device_info_is_9lp(devinfo) &&
-       is_linear_astc)
-      flush_astc_denorms(ctx, dims, texImage,
-                         xoffset, yoffset, zoffset,
-                         width, height, depth);
-}
-
-void
-brw_init_texture_image_functions(struct dd_function_table *functions)
-{
-   functions->TexImage = brw_teximage;
-   functions->TexSubImage = brw_texsubimage;
-   functions->CompressedTexSubImage = brw_compressedtexsubimage;
-   functions->EGLImageTargetTexture2D = brw_image_target_texture_2d;
-   functions->EGLImageTargetTexStorage = brw_image_target_tex_storage;
-   functions->BindRenderbufferTexImage = brw_bind_renderbuffer_tex_image;
-   functions->GetTexSubImage = brw_get_tex_sub_image;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_tex_obj.h b/src/mesa/drivers/dri/i965/brw_tex_obj.h
deleted file mode 100644
index 7946851..0000000
--- a/src/mesa/drivers/dri/i965/brw_tex_obj.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright 2003 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _BRW_TEX_OBJ_H
-#define _BRW_TEX_OBJ_H
-
-#include "swrast/s_context.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct brw_texture_object
-{
-   struct gl_texture_object base;
-
-   /* This is a mirror of base._MaxLevel, updated at validate time,
-    * except that we don't bother with the non-base levels for
-    * non-mipmapped textures.
-    */
-   unsigned int _MaxLevel;
-
-   unsigned int validated_first_level;
-   unsigned int validated_last_level;
-
-   /* The miptree of pixel data for the texture (if !needs_validate).  After
-    * validation, the images will also have references to the same mt.
-    */
-   struct brw_mipmap_tree *mt;
-
-   /**
-    * Set when mipmap trees in the texture images of this texture object
-    * might not all be the mipmap tree above.
-    */
-   bool needs_validate;
-
-   /* Mesa format for the validated texture object. For non-views this
-    * will always be the same as texObj->Image[0][0].TexFormat. For views, it
-    * may differ since the mt is shared across views with differing formats.
-    */
-   mesa_format _Format;
-
-   const struct brw_image_format *planar_format;
-   unsigned int yuv_color_space;
-};
-
-
-/**
- * brw_texture_image is a subclass of swrast_texture_image because we
- * sometimes fall back to using the swrast module for software rendering.
- */
-struct brw_texture_image
-{
-   struct swrast_texture_image base;
-
-   /* If brw_image->mt != NULL, image data is stored here.
-    * Else if brw_image->base.Buffer != NULL, image is stored there.
-    * Else there is no image data.
-    */
-   struct brw_mipmap_tree *mt;
-};
-
-static inline struct brw_texture_object *
-brw_texture_object(struct gl_texture_object *obj)
-{
-   return (struct brw_texture_object *) obj;
-}
-
-static inline struct brw_texture_image *
-brw_texture_image(struct gl_texture_image *img)
-{
-   return (struct brw_texture_image *) img;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _BRW_TEX_OBJ_H */
diff --git a/src/mesa/drivers/dri/i965/brw_tex_validate.c b/src/mesa/drivers/dri/i965/brw_tex_validate.c
deleted file mode 100644
index 36803cc..0000000
--- a/src/mesa/drivers/dri/i965/brw_tex_validate.c
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright Â© 2013 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/samplerobj.h"
-#include "main/teximage.h"
-#include "main/texobj.h"
-
-#include "brw_context.h"
-#include "brw_mipmap_tree.h"
-#include "brw_tex.h"
-
-#define FILE_DEBUG_FLAG DEBUG_TEXTURE
-
-/**
- * Sets our driver-specific variant of tObj->_MaxLevel for later surface state
- * upload.
- *
- * If we're only ensuring that there is storage for the first miplevel of a
- * texture, then in texture setup we're going to have to make sure we don't
- * allow sampling beyond level 0.
- */
-static void
-brw_update_max_level(struct gl_texture_object *tObj,
-                     struct gl_sampler_object *sampler)
-{
-   struct brw_texture_object *brw_obj = brw_texture_object(tObj);
-
-   if (!tObj->_MipmapComplete ||
-       (tObj->_RenderToTexture &&
-        (sampler->Attrib.MinFilter == GL_NEAREST ||
-         sampler->Attrib.MinFilter == GL_LINEAR))) {
-      brw_obj->_MaxLevel = tObj->Attrib.BaseLevel;
-   } else {
-      brw_obj->_MaxLevel = tObj->_MaxLevel;
-   }
-}
-
-/**
- * At rendering-from-a-texture time, make sure that the texture object has a
- * miptree that can hold the entire texture based on
- * BaseLevel/MaxLevel/filtering, and copy in any texture images that are
- * stored in other miptrees.
- */
-void
-brw_finalize_mipmap_tree(struct brw_context *brw,
-                           struct gl_texture_object *tObj)
-{
-   struct brw_texture_object *brw_obj = brw_texture_object(tObj);
-   GLuint face, i;
-   GLuint nr_faces = 0;
-   struct brw_texture_image *firstImage;
-   int width, height, depth;
-
-   /* TBOs require no validation -- they always just point to their BO. */
-   if (tObj->Target == GL_TEXTURE_BUFFER)
-      return;
-
-   /* What levels does this validated texture image require? */
-   int validate_first_level = tObj->Attrib.BaseLevel;
-   int validate_last_level = brw_obj->_MaxLevel;
-
-   /* Skip the loop over images in the common case of no images having
-    * changed.  But if the GL_BASE_LEVEL or GL_MAX_LEVEL change to something we
-    * haven't looked at, then we do need to look at those new images.
-    */
-   if (!brw_obj->needs_validate &&
-       validate_first_level >= brw_obj->validated_first_level &&
-       validate_last_level <= brw_obj->validated_last_level) {
-      return;
-   }
-
-   /* On recent generations, immutable textures should not get this far
-    * -- they should have been created in a validated state, and nothing
-    * can invalidate them.
-    *
-    * Unfortunately, this is not true on pre-Sandybridge hardware -- when
-    * rendering into an immutable-format depth texture we may have to rebase
-    * the rendered levels to meet alignment requirements.
-    *
-    * FINISHME: Avoid doing this.
-    */
-   assert(!tObj->Immutable || brw->screen->devinfo.ver < 6);
-
-   firstImage = brw_texture_image(tObj->Image[0][tObj->Attrib.BaseLevel]);
-   if (!firstImage)
-      return;
-
-   /* Check tree can hold all active levels.  Check tree matches
-    * target, imageFormat, etc.
-    */
-   if (brw_obj->mt &&
-       (!brw_miptree_match_image(brw_obj->mt, &firstImage->base.Base) ||
-        validate_first_level < brw_obj->mt->first_level ||
-        validate_last_level > brw_obj->mt->last_level)) {
-      brw_miptree_release(&brw_obj->mt);
-   }
-
-
-   /* May need to create a new tree:
-    */
-   if (!brw_obj->mt) {
-      const unsigned level = firstImage->base.Base.Level;
-      brw_get_image_dims(&firstImage->base.Base, &width, &height, &depth);
-      /* Figure out image dimensions at start level. */
-      switch(brw_obj->base.Target) {
-      case GL_TEXTURE_2D_MULTISAMPLE:
-      case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
-      case GL_TEXTURE_RECTANGLE:
-      case GL_TEXTURE_EXTERNAL_OES:
-          assert(level == 0);
-          break;
-      case GL_TEXTURE_3D:
-          depth = depth << level;
-          FALLTHROUGH;
-      case GL_TEXTURE_2D:
-      case GL_TEXTURE_2D_ARRAY:
-      case GL_TEXTURE_CUBE_MAP:
-      case GL_TEXTURE_CUBE_MAP_ARRAY:
-          height = height << level;
-          FALLTHROUGH;
-      case GL_TEXTURE_1D:
-      case GL_TEXTURE_1D_ARRAY:
-          width = width << level;
-          break;
-      default:
-          unreachable("Unexpected target");
-      }
-      perf_debug("Creating new %s %dx%dx%d %d-level miptree to handle "
-                 "finalized texture miptree.\n",
-                 _mesa_get_format_name(firstImage->base.Base.TexFormat),
-                 width, height, depth, validate_last_level + 1);
-
-      brw_obj->mt = brw_miptree_create(brw,
-                                       brw_obj->base.Target,
-                                       firstImage->base.Base.TexFormat,
-                                       0, /* first_level */
-                                       validate_last_level,
-                                       width,
-                                       height,
-                                       depth,
-                                       1 /* num_samples */,
-                                       MIPTREE_CREATE_BUSY);
-      if (!brw_obj->mt)
-         return;
-   }
-
-   /* Pull in any images not in the object's tree:
-    */
-   nr_faces = _mesa_num_tex_faces(brw_obj->base.Target);
-   for (face = 0; face < nr_faces; face++) {
-      for (i = validate_first_level; i <= validate_last_level; i++) {
-         struct brw_texture_image *brw_image =
-            brw_texture_image(brw_obj->base.Image[face][i]);
-         /* skip too small size mipmap */
-         if (brw_image == NULL)
-            break;
-
-         if (brw_obj->mt != brw_image->mt)
-            brw_miptree_copy_teximage(brw, brw_image, brw_obj->mt);
-
-         /* After we're done, we'd better agree that our layout is
-          * appropriate, or we'll end up hitting this function again on the
-          * next draw
-          */
-         assert(brw_miptree_match_image(brw_obj->mt, &brw_image->base.Base));
-      }
-   }
-
-   brw_obj->validated_first_level = validate_first_level;
-   brw_obj->validated_last_level = validate_last_level;
-   brw_obj->_Format = firstImage->base.Base.TexFormat,
-   brw_obj->needs_validate = false;
-}
-
-/**
- * Finalizes all textures, completing any rendering that needs to be done
- * to prepare them.
- */
-void
-brw_validate_textures(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   const int max_enabled_unit = ctx->Texture._MaxEnabledTexImageUnit;
-
-   for (int unit = 0; unit <= max_enabled_unit; unit++) {
-      struct gl_texture_object *tex_obj = ctx->Texture.Unit[unit]._Current;
-
-      if (!tex_obj)
-         continue;
-
-      struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
-
-      /* We know that this is true by now, and if it wasn't, we might have
-       * mismatched level sizes and the copies would fail.
-       */
-      assert(tex_obj->_BaseComplete);
-
-      brw_update_max_level(tex_obj, sampler);
-      brw_finalize_mipmap_tree(brw, tex_obj);
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/brw_upload.c b/src/mesa/drivers/dri/i965/brw_upload.c
deleted file mode 100644
index 8f7acf0..0000000
--- a/src/mesa/drivers/dri/i965/brw_upload.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright 2003 VMware, Inc.
- * Copyright Â© 2007 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/**
- * @file intel_upload.c
- *
- * Batched upload via BOs.
- */
-
-#include "main/macros.h"
-#include "brw_bufmgr.h"
-#include "brw_context.h"
-#include "brw_buffer_objects.h"
-
-void
-brw_upload_finish(struct brw_uploader *upload)
-{
-   assert((upload->bo == NULL) == (upload->map == NULL));
-   if (!upload->bo)
-      return;
-
-   brw_bo_unmap(upload->bo);
-   brw_bo_unreference(upload->bo);
-   upload->bo = NULL;
-   upload->map = NULL;
-   upload->next_offset = 0;
-}
-
-/**
- * Interface for getting memory for uploading streamed data to the GPU
- *
- * In most cases, streamed data (for GPU state structures, for example) is
- * uploaded through brw_state_batch(), since that interface allows relocations
- * from the streamed space returned to other BOs.  However, that interface has
- * the restriction that the amount of space allocated has to be "small".
- *
- * This interface, on the other hand, is able to handle arbitrary sized
- * allocation requests, though it will batch small allocations into the same
- * BO for efficiency and reduced memory footprint.
- *
- * \note The returned pointer is valid only until brw_upload_finish().
- *
- * \param out_bo Pointer to a BO, which must point to a valid BO or NULL on
- * entry, and will have a reference to the new BO containing the state on
- * return.
- *
- * \param out_offset Offset within the buffer object that the data will land.
- */
-void *
-brw_upload_space(struct brw_uploader *upload,
-                 uint32_t size,
-                 uint32_t alignment,
-                 struct brw_bo **out_bo,
-                 uint32_t *out_offset)
-{
-   uint32_t offset;
-
-   offset = ALIGN_NPOT(upload->next_offset, alignment);
-   if (upload->bo && offset + size > upload->bo->size) {
-      brw_upload_finish(upload);
-      offset = 0;
-   }
-
-   assert((upload->bo == NULL) == (upload->map == NULL));
-   if (!upload->bo) {
-      upload->bo = brw_bo_alloc(upload->bufmgr, "streamed data",
-                                MAX2(upload->default_size, size),
-                                BRW_MEMZONE_OTHER);
-      upload->map = brw_bo_map(NULL, upload->bo,
-                               MAP_READ | MAP_WRITE |
-                               MAP_PERSISTENT | MAP_ASYNC);
-   }
-
-   upload->next_offset = offset + size;
-
-   *out_offset = offset;
-   if (*out_bo != upload->bo) {
-      brw_bo_unreference(*out_bo);
-      *out_bo = upload->bo;
-      brw_bo_reference(upload->bo);
-   }
-
-   return upload->map + offset;
-}
-
-/**
- * Handy interface to upload some data to temporary GPU memory quickly.
- *
- * References to this memory should not be retained across batch flushes.
- */
-void
-brw_upload_data(struct brw_uploader *upload,
-                const void *data,
-                uint32_t size,
-                uint32_t alignment,
-                struct brw_bo **out_bo,
-                uint32_t *out_offset)
-{
-   void *dst = brw_upload_space(upload, size, alignment, out_bo, out_offset);
-   memcpy(dst, data, size);
-}
-
-void
-brw_upload_init(struct brw_uploader *upload,
-                struct brw_bufmgr *bufmgr,
-                unsigned default_size)
-{
-   upload->bufmgr = bufmgr;
-   upload->bo = NULL;
-   upload->map = NULL;
-   upload->next_offset = 0;
-   upload->default_size = default_size;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c
deleted file mode 100644
index 7f9b4cc..0000000
--- a/src/mesa/drivers/dri/i965/brw_urb.c
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-
-
-#include "brw_batch.h"
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-
-#define VS 0
-#define GS 1
-#define CLP 2
-#define SF 3
-#define CS 4
-
-/** @file brw_urb.c
- *
- * Manages the division of the URB space between the various fixed-function
- * units.
- *
- * See the Thread Initiation Management section of the GFX4 B-Spec, and
- * the individual *_STATE structures for restrictions on numbers of
- * entries and threads.
- */
-
-/*
- * Generally, a unit requires a min_nr_entries based on how many entries
- * it produces before the downstream unit gets unblocked and can use and
- * dereference some of its handles.
- *
- * The SF unit preallocates a PUE at the start of thread dispatch, and only
- * uses that one.  So it requires one entry per thread.
- *
- * For CLIP, the SF unit will hold the previous primitive while the
- * next is getting assembled, meaning that linestrips require 3 CLIP VUEs
- * (vertices) to ensure continued processing, trifans require 4, and tristrips
- * require 5.  There can be 1 or 2 threads, and each has the same requirement.
- *
- * GS has the same requirement as CLIP, but it never handles tristrips,
- * so we can lower the minimum to 4 for the POLYGONs (trifans) it produces.
- * We only run it single-threaded.
- *
- * For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X).
- * Each thread processes 2 preallocated VUEs (vertices) at a time, and they
- * get streamed down as soon as threads processing earlier vertices get
- * theirs accepted.
- *
- * Each unit will take the number of URB entries we give it (based on the
- * entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs,
- * and brw_curbe.c for the CURBEs) and decide its maximum number of
- * threads it can support based on that. in brw_*_state.c.
- *
- * XXX: Are the min_entry_size numbers useful?
- * XXX: Verify min_nr_entries, esp for VS.
- * XXX: Verify SF min_entry_size.
- */
-static const struct {
-   GLuint min_nr_entries;
-   GLuint preferred_nr_entries;
-   GLuint min_entry_size;
-   GLuint max_entry_size;
-} limits[CS+1] = {
-   { 16, 32,  1,  5 }, /* vs */
-   {  4,  8,  1,  5 }, /* gs */
-   {  5, 10,  1,  5 }, /* clp */
-   {  1,  8,  1, 12 }, /* sf */
-   {  1,  4,  1, 32 }  /* cs */
-};
-
-
-static bool check_urb_layout(struct brw_context *brw)
-{
-   brw->urb.vs_start = 0;
-   brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize;
-   brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize;
-   brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
-   brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
-
-   return brw->urb.cs_start + brw->urb.nr_cs_entries *
-      brw->urb.csize <= brw->urb.size;
-}
-
-/* Most minimal update, forces re-emit of URB fence packet after GS
- * unit turned on/off.
- */
-void
-brw_calculate_urb_fence(struct brw_context *brw, unsigned csize,
-                        unsigned vsize, unsigned sfsize)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (csize < limits[CS].min_entry_size)
-      csize = limits[CS].min_entry_size;
-
-   if (vsize < limits[VS].min_entry_size)
-      vsize = limits[VS].min_entry_size;
-
-   if (sfsize < limits[SF].min_entry_size)
-      sfsize = limits[SF].min_entry_size;
-
-   if (brw->urb.vsize < vsize ||
-       brw->urb.sfsize < sfsize ||
-       brw->urb.csize < csize ||
-       (brw->urb.constrained && (brw->urb.vsize > vsize ||
-                                 brw->urb.sfsize > sfsize ||
-                                 brw->urb.csize > csize))) {
-
-
-      brw->urb.csize = csize;
-      brw->urb.sfsize = sfsize;
-      brw->urb.vsize = vsize;
-
-      brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
-      brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries;
-      brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
-      brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
-      brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;
-
-      brw->urb.constrained = 0;
-
-      if (devinfo->ver == 5) {
-         brw->urb.nr_vs_entries = 128;
-         brw->urb.nr_sf_entries = 48;
-         if (check_urb_layout(brw)) {
-            goto done;
-         } else {
-            brw->urb.constrained = 1;
-            brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
-            brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
-         }
-      } else if (devinfo->verx10 == 45) {
-         brw->urb.nr_vs_entries = 64;
-         if (check_urb_layout(brw)) {
-            goto done;
-         } else {
-            brw->urb.constrained = 1;
-            brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
-         }
-      }
-
-      if (!check_urb_layout(brw)) {
-         brw->urb.nr_vs_entries = limits[VS].min_nr_entries;
-         brw->urb.nr_gs_entries = limits[GS].min_nr_entries;
-         brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
-         brw->urb.nr_sf_entries = limits[SF].min_nr_entries;
-         brw->urb.nr_cs_entries = limits[CS].min_nr_entries;
-
-         /* Mark us as operating with constrained nr_entries, so that next
-          * time we recalculate we'll resize the fences in the hope of
-          * escaping constrained mode and getting back to normal performance.
-          */
-         brw->urb.constrained = 1;
-
-         if (!check_urb_layout(brw)) {
-            /* This is impossible, given the maximal sizes of urb
-             * entries and the values for minimum nr of entries
-             * provided above.
-             */
-            fprintf(stderr, "couldn't calculate URB layout!\n");
-            exit(1);
-         }
-
-         if (INTEL_DEBUG(DEBUG_URB|DEBUG_PERF))
-            fprintf(stderr, "URB CONSTRAINED\n");
-      }
-
-done:
-      if (INTEL_DEBUG(DEBUG_URB))
-         fprintf(stderr,
-                 "URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
-                 brw->urb.vs_start,
-                 brw->urb.gs_start,
-                 brw->urb.clip_start,
-                 brw->urb.sf_start,
-                 brw->urb.cs_start,
-                 brw->urb.size);
-
-      brw->ctx.NewDriverState |= BRW_NEW_URB_FENCE;
-   }
-}
-
-static void recalculate_urb_fence( struct brw_context *brw )
-{
-   brw_calculate_urb_fence(brw, brw->curbe.total_size,
-                           brw_vue_prog_data(brw->vs.base.prog_data)->urb_entry_size,
-                           brw->sf.prog_data->urb_entry_size);
-}
-
-
-const struct brw_tracked_state brw_recalculate_urb_fence = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BLORP |
-             BRW_NEW_PUSH_CONSTANT_ALLOCATION |
-             BRW_NEW_SF_PROG_DATA |
-             BRW_NEW_VS_PROG_DATA,
-   },
-   .emit = recalculate_urb_fence
-};
-
-
-
-
-
-void brw_upload_urb_fence(struct brw_context *brw)
-{
-   struct brw_urb_fence uf;
-   memset(&uf, 0, sizeof(uf));
-
-   uf.header.opcode = CMD_URB_FENCE;
-   uf.header.length = sizeof(uf)/4-2;
-   uf.header.vs_realloc = 1;
-   uf.header.gs_realloc = 1;
-   uf.header.clp_realloc = 1;
-   uf.header.sf_realloc = 1;
-   uf.header.vfe_realloc = 1;
-   uf.header.cs_realloc = 1;
-
-   /* The ordering below is correct, not the layout in the
-    * instruction.
-    *
-    * There are 256/384 urb reg pairs in total.
-    */
-   uf.bits0.vs_fence  = brw->urb.gs_start;
-   uf.bits0.gs_fence  = brw->urb.clip_start;
-   uf.bits0.clp_fence = brw->urb.sf_start;
-   uf.bits1.sf_fence  = brw->urb.cs_start;
-   uf.bits1.cs_fence  = brw->urb.size;
-
-   /* erratum: URB_FENCE must not cross a 64byte cacheline */
-   if ((USED_BATCH(brw->batch) & 15) > 12) {
-      int pad = 16 - (USED_BATCH(brw->batch) & 15);
-      do
-         *brw->batch.map_next++ = MI_NOOP;
-      while (--pad);
-   }
-
-   brw_batch_data(brw, &uf, sizeof(uf));
-}
diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c
deleted file mode 100644
index 90aff43..0000000
--- a/src/mesa/drivers/dri/i965/brw_util.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-
-#include "brw_util.h"
-#include "brw_defines.h"
-#include "compiler/brw_eu_defines.h"
-
-GLuint brw_translate_blend_equation( GLenum mode )
-{
-   switch (mode) {
-   case GL_FUNC_ADD:
-      return BRW_BLENDFUNCTION_ADD;
-   case GL_MIN:
-      return BRW_BLENDFUNCTION_MIN;
-   case GL_MAX:
-      return BRW_BLENDFUNCTION_MAX;
-   case GL_FUNC_SUBTRACT:
-      return BRW_BLENDFUNCTION_SUBTRACT;
-   case GL_FUNC_REVERSE_SUBTRACT:
-      return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
-   default:
-      unreachable("not reached");
-   }
-}
-
-GLuint brw_translate_blend_factor( GLenum factor )
-{
-   switch(factor) {
-   case GL_ZERO:
-      return BRW_BLENDFACTOR_ZERO;
-   case GL_SRC_ALPHA:
-      return BRW_BLENDFACTOR_SRC_ALPHA;
-   case GL_ONE:
-      return BRW_BLENDFACTOR_ONE;
-   case GL_SRC_COLOR:
-      return BRW_BLENDFACTOR_SRC_COLOR;
-   case GL_ONE_MINUS_SRC_COLOR:
-      return BRW_BLENDFACTOR_INV_SRC_COLOR;
-   case GL_DST_COLOR:
-      return BRW_BLENDFACTOR_DST_COLOR;
-   case GL_ONE_MINUS_DST_COLOR:
-      return BRW_BLENDFACTOR_INV_DST_COLOR;
-   case GL_ONE_MINUS_SRC_ALPHA:
-      return BRW_BLENDFACTOR_INV_SRC_ALPHA;
-   case GL_DST_ALPHA:
-      return BRW_BLENDFACTOR_DST_ALPHA;
-   case GL_ONE_MINUS_DST_ALPHA:
-      return BRW_BLENDFACTOR_INV_DST_ALPHA;
-   case GL_SRC_ALPHA_SATURATE:
-      return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
-   case GL_CONSTANT_COLOR:
-      return BRW_BLENDFACTOR_CONST_COLOR;
-   case GL_ONE_MINUS_CONSTANT_COLOR:
-      return BRW_BLENDFACTOR_INV_CONST_COLOR;
-   case GL_CONSTANT_ALPHA:
-      return BRW_BLENDFACTOR_CONST_ALPHA;
-   case GL_ONE_MINUS_CONSTANT_ALPHA:
-      return BRW_BLENDFACTOR_INV_CONST_ALPHA;
-
-   case GL_SRC1_COLOR:
-      return BRW_BLENDFACTOR_SRC1_COLOR;
-   case GL_SRC1_ALPHA:
-      return BRW_BLENDFACTOR_SRC1_ALPHA;
-   case GL_ONE_MINUS_SRC1_COLOR:
-      return BRW_BLENDFACTOR_INV_SRC1_COLOR;
-   case GL_ONE_MINUS_SRC1_ALPHA:
-      return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
-
-   default:
-      unreachable("not reached");
-   }
-}
-
-static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = {
-   [GL_POINTS] =_3DPRIM_POINTLIST,
-   [GL_LINES] = _3DPRIM_LINELIST,
-   [GL_LINE_LOOP] = _3DPRIM_LINELOOP,
-   [GL_LINE_STRIP] = _3DPRIM_LINESTRIP,
-   [GL_TRIANGLES] = _3DPRIM_TRILIST,
-   [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
-   [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
-   [GL_QUADS] = _3DPRIM_QUADLIST,
-   [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
-   [GL_POLYGON] = _3DPRIM_POLYGON,
-   [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
-   [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
-   [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
-   [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
-};
-
-uint32_t
-get_hw_prim_for_gl_prim(int mode)
-{
-   assert(mode < ARRAY_SIZE(prim_to_hw_prim));
-   return prim_to_hw_prim[mode];
-}
diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h
deleted file mode 100644
index 095c43a..0000000
--- a/src/mesa/drivers/dri/i965/brw_util.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-
-#ifndef BRW_UTIL_H
-#define BRW_UTIL_H
-
-#include "brw_context.h"
-#include "main/framebuffer.h"
-
-extern GLuint brw_translate_blend_factor( GLenum factor );
-extern GLuint brw_translate_blend_equation( GLenum mode );
-
-static inline float
-brw_get_line_width(struct brw_context *brw)
-{
-   /* From the OpenGL 4.4 spec:
-    *
-    * "The actual width of non-antialiased lines is determined by rounding
-    * the supplied width to the nearest integer, then clamping it to the
-    * implementation-dependent maximum non-antialiased line width."
-    */
-   float line_width =
-      CLAMP(!_mesa_is_multisample_enabled(&brw->ctx) && !brw->ctx.Line.SmoothFlag
-            ? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width,
-            0.125f, brw->ctx.Const.MaxLineWidth);
-
-   if (!_mesa_is_multisample_enabled(&brw->ctx) && brw->ctx.Line.SmoothFlag && line_width < 1.5f) {
-      /* For 1 pixel line thickness or less, the general
-       * anti-aliasing algorithm gives up, and a garbage line is
-       * generated.  Setting a Line Width of 0.0 specifies the
-       * rasterization of the "thinnest" (one-pixel-wide),
-       * non-antialiased lines.
-       *
-       * Lines rendered with zero Line Width are rasterized using
-       * Grid Intersection Quantization rules as specified by
-       * bspec section 6.3.12.1 Zero-Width (Cosmetic) Line
-       * Rasterization.
-       */
-      line_width = 0.0f;
-   }
-
-   return line_width;
-}
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
deleted file mode 100644
index 1d22c0d..0000000
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ /dev/null
@@ -1,369 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-
-#include "util/compiler.h"
-#include "main/context.h"
-#include "brw_context.h"
-#include "brw_vs.h"
-#include "brw_util.h"
-#include "brw_state.h"
-#include "program/prog_print.h"
-#include "program/prog_parameter.h"
-#include "compiler/brw_nir.h"
-#include "brw_program.h"
-
-#include "util/ralloc.h"
-
-/**
- * Decide which set of clip planes should be used when clipping via
- * gl_Position or gl_ClipVertex.
- */
-gl_clip_plane *
-brw_select_clip_planes(struct gl_context *ctx)
-{
-   if (ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]) {
-      /* There is currently a GLSL vertex shader, so clip according to GLSL
-       * rules, which means compare gl_ClipVertex (or gl_Position, if
-       * gl_ClipVertex wasn't assigned) against the eye-coordinate clip planes
-       * that were stored in EyeUserPlane at the time the clip planes were
-       * specified.
-       */
-      return ctx->Transform.EyeUserPlane;
-   } else {
-      /* Either we are using fixed function or an ARB vertex program.  In
-       * either case the clip planes are going to be compared against
-       * gl_Position (which is in clip coordinates) so we have to clip using
-       * _ClipUserPlane, which was transformed into clip coordinates by Mesa
-       * core.
-       */
-      return ctx->Transform._ClipUserPlane;
-   }
-}
-
-static GLbitfield64
-brw_vs_outputs_written(struct brw_context *brw, struct brw_vs_prog_key *key,
-                       GLbitfield64 user_varyings)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   GLbitfield64 outputs_written = user_varyings;
-
-   if (devinfo->ver < 6) {
-      /* Put dummy slots into the VUE for the SF to put the replaced
-       * point sprite coords in.  We shouldn't need these dummy slots,
-       * which take up precious URB space, but it would mean that the SF
-       * doesn't get nice aligned pairs of input coords into output
-       * coords, which would be a pain to handle.
-       */
-      for (unsigned i = 0; i < 8; i++) {
-         if (key->point_coord_replace & (1 << i))
-            outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
-      }
-
-      /* if back colors are written, allocate slots for front colors too */
-      if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
-         outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
-      if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
-         outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
-   }
-
-   /* In order for legacy clipping to work, we need to populate the clip
-    * distance varying slots whenever clipping is enabled, even if the vertex
-    * shader doesn't write to gl_ClipDistance.
-    */
-   if (key->nr_userclip_plane_consts > 0) {
-      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
-      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
-   }
-
-   return outputs_written;
-}
-
-static bool
-brw_codegen_vs_prog(struct brw_context *brw,
-                    struct brw_program *vp,
-                    struct brw_vs_prog_key *key)
-{
-   const struct brw_compiler *compiler = brw->screen->compiler;
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const GLuint *program;
-   struct brw_vs_prog_data prog_data;
-   struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base;
-   void *mem_ctx;
-   bool start_busy = false;
-   double start_time = 0;
-
-   memset(&prog_data, 0, sizeof(prog_data));
-
-   /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
-   if (vp->program.info.is_arb_asm)
-      stage_prog_data->use_alt_mode = true;
-
-   mem_ctx = ralloc_context(NULL);
-
-   nir_shader *nir = nir_shader_clone(mem_ctx, vp->program.nir);
-
-   brw_assign_common_binding_table_offsets(devinfo, &vp->program,
-                                           &prog_data.base.base, 0);
-
-   if (!vp->program.info.is_arb_asm) {
-      brw_nir_setup_glsl_uniforms(mem_ctx, nir, &vp->program,
-                                  &prog_data.base.base,
-                                  compiler->scalar_stage[MESA_SHADER_VERTEX]);
-      if (brw->can_push_ubos) {
-         brw_nir_analyze_ubo_ranges(compiler, nir, key,
-                                    prog_data.base.base.ubo_ranges);
-      }
-   } else {
-      brw_nir_setup_arb_uniforms(mem_ctx, nir, &vp->program,
-                                 &prog_data.base.base);
-   }
-
-   if (key->nr_userclip_plane_consts > 0) {
-      brw_nir_lower_legacy_clipping(nir, key->nr_userclip_plane_consts,
-                                    &prog_data.base.base);
-   }
-
-   if (key->copy_edgeflag)
-      nir_lower_passthrough_edgeflags(nir);
-
-   uint64_t outputs_written =
-      brw_vs_outputs_written(brw, key, nir->info.outputs_written);
-
-   brw_compute_vue_map(devinfo,
-                       &prog_data.base.vue_map, outputs_written,
-                       nir->info.separate_shader, 1);
-
-   if (0) {
-      _mesa_fprint_program_opt(stderr, &vp->program, PROG_PRINT_DEBUG, true);
-   }
-
-   if (unlikely(brw->perf_debug)) {
-      start_busy = (brw->batch.last_bo &&
-                    brw_bo_busy(brw->batch.last_bo));
-      start_time = get_time();
-   }
-
-   if (INTEL_DEBUG(DEBUG_VS)) {
-      if (vp->program.info.is_arb_asm)
-         brw_dump_arb_asm("vertex", &vp->program);
-   }
-
-
-   /* Emit GFX4 code.
-    */
-   struct brw_compile_vs_params params = {
-      .nir = nir,
-      .key = key,
-      .prog_data = &prog_data,
-      .log_data = brw,
-   };
-
-   if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
-      params.shader_time = true;
-      params.shader_time_index =
-         brw_get_shader_time_index(brw, &vp->program, ST_VS,
-                                   !vp->program.info.is_arb_asm);
-   }
-
-   program = brw_compile_vs(compiler, mem_ctx, &params);
-   if (program == NULL) {
-      if (!vp->program.info.is_arb_asm) {
-         vp->program.sh.data->LinkStatus = LINKING_FAILURE;
-         ralloc_strcat(&vp->program.sh.data->InfoLog, params.error_str);
-      }
-
-      _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", params.error_str);
-
-      ralloc_free(mem_ctx);
-      return false;
-   }
-
-   if (unlikely(brw->perf_debug)) {
-      if (vp->compiled_once) {
-         brw_debug_recompile(brw, MESA_SHADER_VERTEX, vp->program.Id,
-                             &key->base);
-      }
-      if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
-         perf_debug("VS compile took %.03f ms and stalled the GPU\n",
-                    (get_time() - start_time) * 1000);
-      }
-      vp->compiled_once = true;
-   }
-
-   /* Scratch space is used for register spilling */
-   brw_alloc_stage_scratch(brw, &brw->vs.base,
-                           prog_data.base.base.total_scratch);
-
-   /* The param and pull_param arrays will be freed by the shader cache. */
-   ralloc_steal(NULL, prog_data.base.base.param);
-   ralloc_steal(NULL, prog_data.base.base.pull_param);
-   brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
-                    key, sizeof(struct brw_vs_prog_key),
-                    program, prog_data.base.base.program_size,
-                    &prog_data, sizeof(prog_data),
-                    &brw->vs.base.prog_offset, &brw->vs.base.prog_data);
-   ralloc_free(mem_ctx);
-
-   return true;
-}
-
-static bool
-brw_vs_state_dirty(const struct brw_context *brw)
-{
-   return brw_state_dirty(brw,
-                          _NEW_BUFFERS |
-                          _NEW_LIGHT |
-                          _NEW_POINT |
-                          _NEW_POLYGON |
-                          _NEW_TEXTURE |
-                          _NEW_TRANSFORM,
-                          BRW_NEW_VERTEX_PROGRAM |
-                          BRW_NEW_VS_ATTRIB_WORKAROUNDS);
-}
-
-void
-brw_vs_populate_key(struct brw_context *brw,
-                    struct brw_vs_prog_key *key)
-{
-   struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_VERTEX_PROGRAM */
-   struct gl_program *prog = brw->programs[MESA_SHADER_VERTEX];
-   struct brw_program *vp = (struct brw_program *) prog;
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   memset(key, 0, sizeof(*key));
-
-   /* Just upload the program verbatim for now.  Always send it all
-    * the inputs it asks for, whether they are varying or not.
-    */
-
-   /* _NEW_TEXTURE */
-   brw_populate_base_prog_key(ctx, vp, &key->base);
-
-   if (ctx->Transform.ClipPlanesEnabled != 0 &&
-       (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES) &&
-       vp->program.info.clip_distance_array_size == 0) {
-      key->nr_userclip_plane_consts =
-         util_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
-   }
-
-   if (devinfo->ver < 6) {
-      /* _NEW_POLYGON */
-      key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
-                            ctx->Polygon.BackMode != GL_FILL);
-
-      /* _NEW_POINT */
-      if (ctx->Point.PointSprite) {
-         key->point_coord_replace = ctx->Point.CoordReplace & 0xff;
-      }
-   }
-
-   if (prog->info.outputs_written &
-       (VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 |
-        VARYING_BIT_BFC1)) {
-      /* _NEW_LIGHT | _NEW_BUFFERS */
-      key->clamp_vertex_color = ctx->Light._ClampVertexColor;
-   }
-
-   /* BRW_NEW_VS_ATTRIB_WORKAROUNDS */
-   if (devinfo->verx10 <= 70) {
-      memcpy(key->gl_attrib_wa_flags, brw->vb.attrib_wa_flags,
-             sizeof(brw->vb.attrib_wa_flags));
-   }
-}
-
-void
-brw_upload_vs_prog(struct brw_context *brw)
-{
-   struct brw_vs_prog_key key;
-   /* BRW_NEW_VERTEX_PROGRAM */
-   struct brw_program *vp =
-      (struct brw_program *) brw->programs[MESA_SHADER_VERTEX];
-
-   if (!brw_vs_state_dirty(brw))
-      return;
-
-   brw_vs_populate_key(brw, &key);
-
-   if (brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG, &key, sizeof(key),
-                        &brw->vs.base.prog_offset, &brw->vs.base.prog_data,
-                        true))
-      return;
-
-   if (brw_disk_cache_upload_program(brw, MESA_SHADER_VERTEX))
-      return;
-
-   vp = (struct brw_program *) brw->programs[MESA_SHADER_VERTEX];
-   vp->id = key.base.program_string_id;
-
-   ASSERTED bool success = brw_codegen_vs_prog(brw, vp, &key);
-   assert(success);
-}
-
-void
-brw_vs_populate_default_key(const struct brw_compiler *compiler,
-                            struct brw_vs_prog_key *key,
-                            struct gl_program *prog)
-{
-   const struct intel_device_info *devinfo = compiler->devinfo;
-   struct brw_program *bvp = brw_program(prog);
-
-   memset(key, 0, sizeof(*key));
-
-   brw_populate_default_base_prog_key(devinfo, bvp, &key->base);
-
-   key->clamp_vertex_color =
-      (prog->info.outputs_written &
-       (VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 |
-        VARYING_BIT_BFC1));
-}
-
-bool
-brw_vs_precompile(struct gl_context *ctx, struct gl_program *prog)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_vs_prog_key key;
-   uint32_t old_prog_offset = brw->vs.base.prog_offset;
-   struct brw_stage_prog_data *old_prog_data = brw->vs.base.prog_data;
-   bool success;
-
-   struct brw_program *bvp = brw_program(prog);
-
-   brw_vs_populate_default_key(brw->screen->compiler, &key, prog);
-
-   success = brw_codegen_vs_prog(brw, bvp, &key);
-
-   brw->vs.base.prog_offset = old_prog_offset;
-   brw->vs.base.prog_data = old_prog_data;
-
-   return success;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
deleted file mode 100644
index 207853c..0000000
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-
-#ifndef BRW_VS_H
-#define BRW_VS_H
-
-#include "brw_context.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void
-brw_upload_vs_prog(struct brw_context *brw);
-
-void
-brw_vs_populate_key(struct brw_context *brw,
-                    struct brw_vs_prog_key *key);
-void
-brw_vs_populate_default_key(const struct brw_compiler *compiler,
-                            struct brw_vs_prog_key *key,
-                            struct gl_program *prog);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
deleted file mode 100644
index 6fcf9c5..0000000
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-#include "main/mtypes.h"
-#include "program/prog_parameter.h"
-#include "main/shaderapi.h"
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_buffer_objects.h"
-
-
-/* Creates a new VS constant buffer reflecting the current VS program's
- * constants, if needed by the VS program.
- *
- * Otherwise, constants go through the CURBEs using the brw_constant_buffer
- * state atom.
- */
-static void
-brw_upload_vs_pull_constants(struct brw_context *brw)
-{
-   struct brw_stage_state *stage_state = &brw->vs.base;
-
-   /* BRW_NEW_VERTEX_PROGRAM */
-   struct brw_program *vp =
-      (struct brw_program *) brw->programs[MESA_SHADER_VERTEX];
-
-   /* BRW_NEW_VS_PROG_DATA */
-   const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
-
-   _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_VERTEX);
-   /* _NEW_PROGRAM_CONSTANTS */
-   brw_upload_pull_constants(brw, BRW_NEW_VS_CONSTBUF, &vp->program,
-                             stage_state, prog_data);
-}
-
-const struct brw_tracked_state brw_vs_pull_constants = {
-   .dirty = {
-      .mesa = _NEW_PROGRAM_CONSTANTS,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_VERTEX_PROGRAM |
-             BRW_NEW_VS_PROG_DATA,
-   },
-   .emit = brw_upload_vs_pull_constants,
-};
-
-static void
-brw_upload_vs_ubo_surfaces(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   /* _NEW_PROGRAM */
-   struct gl_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
-
-   /* BRW_NEW_VS_PROG_DATA */
-   brw_upload_ubo_surfaces(brw, prog, &brw->vs.base, brw->vs.base.prog_data);
-}
-
-const struct brw_tracked_state brw_vs_ubo_surfaces = {
-   .dirty = {
-      .mesa = _NEW_PROGRAM,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_UNIFORM_BUFFER |
-             BRW_NEW_VS_PROG_DATA,
-   },
-   .emit = brw_upload_vs_ubo_surfaces,
-};
-
-static void
-brw_upload_vs_image_surfaces(struct brw_context *brw)
-{
-   /* BRW_NEW_VERTEX_PROGRAM */
-   const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
-
-   if (vp) {
-      /* BRW_NEW_VS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
-      brw_upload_image_surfaces(brw, vp, &brw->vs.base,
-                                brw->vs.base.prog_data);
-   }
-}
-
-const struct brw_tracked_state brw_vs_image_surfaces = {
-   .dirty = {
-      .mesa = _NEW_TEXTURE,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_AUX_STATE |
-             BRW_NEW_IMAGE_UNITS |
-             BRW_NEW_VERTEX_PROGRAM |
-             BRW_NEW_VS_PROG_DATA,
-   },
-   .emit = brw_upload_vs_image_surfaces,
-};
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
deleted file mode 100644
index 5b43093..0000000
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ /dev/null
@@ -1,639 +0,0 @@
-/*
- * Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- * Intel funded Tungsten Graphics to
- * develop this 3D driver.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#include "brw_context.h"
-#include "brw_wm.h"
-#include "brw_state.h"
-#include "main/enums.h"
-#include "main/formats.h"
-#include "main/fbobject.h"
-#include "main/samplerobj.h"
-#include "main/framebuffer.h"
-#include "program/prog_parameter.h"
-#include "program/program.h"
-#include "brw_mipmap_tree.h"
-#include "brw_image.h"
-#include "brw_fbo.h"
-#include "compiler/brw_nir.h"
-#include "brw_program.h"
-
-#include "util/ralloc.h"
-#include "util/u_math.h"
-
-static void
-assign_fs_binding_table_offsets(const struct intel_device_info *devinfo,
-                                const struct gl_program *prog,
-                                const struct brw_wm_prog_key *key,
-                                struct brw_wm_prog_data *prog_data)
-{
-   /* Render targets implicitly start at surface index 0.  Even if there are
-    * no color regions, we still perform an FB write to a null render target,
-    * which will be surface 0.
-    */
-   uint32_t next_binding_table_offset = MAX2(key->nr_color_regions, 1);
-
-   next_binding_table_offset =
-      brw_assign_common_binding_table_offsets(devinfo, prog, &prog_data->base,
-                                              next_binding_table_offset);
-
-   if (prog->nir->info.outputs_read && !key->coherent_fb_fetch) {
-      prog_data->binding_table.render_target_read_start =
-         next_binding_table_offset;
-      next_binding_table_offset += key->nr_color_regions;
-   }
-
-   /* Update the binding table size */
-   prog_data->base.binding_table.size_bytes = next_binding_table_offset * 4;
-}
-
-static bool
-brw_codegen_wm_prog(struct brw_context *brw,
-                    struct brw_program *fp,
-                    struct brw_wm_prog_key *key,
-                    struct brw_vue_map *vue_map)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   void *mem_ctx = ralloc_context(NULL);
-   struct brw_wm_prog_data prog_data;
-   const GLuint *program;
-   bool start_busy = false;
-   double start_time = 0;
-
-   nir_shader *nir = nir_shader_clone(mem_ctx, fp->program.nir);
-
-   memset(&prog_data, 0, sizeof(prog_data));
-
-   /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
-   if (fp->program.info.is_arb_asm)
-      prog_data.base.use_alt_mode = true;
-
-   assign_fs_binding_table_offsets(devinfo, &fp->program, key, &prog_data);
-
-   if (!fp->program.info.is_arb_asm) {
-      brw_nir_setup_glsl_uniforms(mem_ctx, nir, &fp->program,
-                                  &prog_data.base, true);
-      if (brw->can_push_ubos) {
-         brw_nir_analyze_ubo_ranges(brw->screen->compiler, nir,
-                                    NULL, prog_data.base.ubo_ranges);
-      }
-   } else {
-      brw_nir_setup_arb_uniforms(mem_ctx, nir, &fp->program, &prog_data.base);
-
-      if (INTEL_DEBUG(DEBUG_WM))
-         brw_dump_arb_asm("fragment", &fp->program);
-   }
-
-   if (unlikely(brw->perf_debug)) {
-      start_busy = (brw->batch.last_bo &&
-                    brw_bo_busy(brw->batch.last_bo));
-      start_time = get_time();
-   }
-
-   struct brw_compile_fs_params params = {
-      .nir = nir,
-      .key = key,
-      .prog_data = &prog_data,
-
-      .allow_spilling = true,
-      .vue_map = vue_map,
-
-      .log_data = brw,
-   };
-
-   if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
-      params.shader_time = true;
-      params.shader_time_index8 =
-         brw_get_shader_time_index(brw, &fp->program, ST_FS8,
-                                   !fp->program.info.is_arb_asm);
-      params.shader_time_index16 =
-         brw_get_shader_time_index(brw, &fp->program, ST_FS16,
-                                   !fp->program.info.is_arb_asm);
-      params.shader_time_index32 =
-         brw_get_shader_time_index(brw, &fp->program, ST_FS32,
-                                   !fp->program.info.is_arb_asm);
-   }
-
-   program = brw_compile_fs(brw->screen->compiler, mem_ctx, &params);
-
-   if (program == NULL) {
-      if (!fp->program.info.is_arb_asm) {
-         fp->program.sh.data->LinkStatus = LINKING_FAILURE;
-         ralloc_strcat(&fp->program.sh.data->InfoLog, params.error_str);
-      }
-
-      _mesa_problem(NULL, "Failed to compile fragment shader: %s\n", params.error_str);
-
-      ralloc_free(mem_ctx);
-      return false;
-   }
-
-   if (unlikely(brw->perf_debug)) {
-      if (fp->compiled_once) {
-         brw_debug_recompile(brw, MESA_SHADER_FRAGMENT, fp->program.Id,
-                             &key->base);
-      }
-      fp->compiled_once = true;
-
-      if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
-         perf_debug("FS compile took %.03f ms and stalled the GPU\n",
-                    (get_time() - start_time) * 1000);
-      }
-   }
-
-   brw_alloc_stage_scratch(brw, &brw->wm.base, prog_data.base.total_scratch);
-
-   if (INTEL_DEBUG(DEBUG_WM) && fp->program.info.is_arb_asm)
-      fprintf(stderr, "\n");
-
-   /* The param and pull_param arrays will be freed by the shader cache. */
-   ralloc_steal(NULL, prog_data.base.param);
-   ralloc_steal(NULL, prog_data.base.pull_param);
-   brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG,
-                    key, sizeof(struct brw_wm_prog_key),
-                    program, prog_data.base.program_size,
-                    &prog_data, sizeof(prog_data),
-                    &brw->wm.base.prog_offset, &brw->wm.base.prog_data);
-
-   ralloc_free(mem_ctx);
-
-   return true;
-}
-
-static uint8_t
-gfx6_gather_workaround(GLenum internalformat)
-{
-   switch (internalformat) {
-   case GL_R8I: return WA_SIGN | WA_8BIT;
-   case GL_R8UI: return WA_8BIT;
-   case GL_R16I: return WA_SIGN | WA_16BIT;
-   case GL_R16UI: return WA_16BIT;
-   default:
-      /* Note that even though GL_R32I and GL_R32UI have format overrides in
-       * the surface state, there is no shader w/a required.
-       */
-      return 0;
-   }
-}
-
-static void
-brw_populate_sampler_prog_key_data(struct gl_context *ctx,
-                                   const struct gl_program *prog,
-                                   struct brw_sampler_prog_key_data *key)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   GLbitfield mask = prog->SamplersUsed;
-
-   while (mask) {
-      const int s = u_bit_scan(&mask);
-
-      key->swizzles[s] = SWIZZLE_NOOP;
-      key->scale_factors[s] = 0.0f;
-
-      int unit_id = prog->SamplerUnits[s];
-      const struct gl_texture_unit *unit = &ctx->Texture.Unit[unit_id];
-
-      if (unit->_Current && unit->_Current->Target != GL_TEXTURE_BUFFER) {
-         const struct gl_texture_object *t = unit->_Current;
-         const struct gl_texture_image *img = t->Image[0][t->Attrib.BaseLevel];
-         struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit_id);
-
-         const bool alpha_depth = t->Attrib.DepthMode == GL_ALPHA &&
-            (img->_BaseFormat == GL_DEPTH_COMPONENT ||
-             img->_BaseFormat == GL_DEPTH_STENCIL);
-
-         /* Haswell handles texture swizzling as surface format overrides
-          * (except for GL_ALPHA); all other platforms need MOVs in the shader.
-          */
-         if (alpha_depth || (devinfo->verx10 <= 70))
-            key->swizzles[s] = brw_get_texture_swizzle(ctx, t);
-
-         if (devinfo->ver < 8 &&
-             sampler->Attrib.MinFilter != GL_NEAREST &&
-             sampler->Attrib.MagFilter != GL_NEAREST) {
-            if (sampler->Attrib.WrapS == GL_CLAMP)
-               key->gl_clamp_mask[0] |= 1 << s;
-            if (sampler->Attrib.WrapT == GL_CLAMP)
-               key->gl_clamp_mask[1] |= 1 << s;
-            if (sampler->Attrib.WrapR == GL_CLAMP)
-               key->gl_clamp_mask[2] |= 1 << s;
-         }
-
-         /* gather4 for RG32* is broken in multiple ways on Gfx7. */
-         if (devinfo->ver == 7 && prog->info.uses_texture_gather) {
-            switch (img->InternalFormat) {
-            case GL_RG32I:
-            case GL_RG32UI: {
-               /* We have to override the format to R32G32_FLOAT_LD.
-                * This means that SCS_ALPHA and SCS_ONE will return 0x3f8
-                * (1.0) rather than integer 1.  This needs shader hacks.
-                *
-                * On Ivybridge, we whack W (alpha) to ONE in our key's
-                * swizzle.  On Haswell, we look at the original texture
-                * swizzle, and use XYZW with channels overridden to ONE,
-                * leaving normal texture swizzling to SCS.
-                */
-               unsigned src_swizzle =
-                  devinfo->platform == INTEL_PLATFORM_HSW ?
-                  t->Attrib._Swizzle : key->swizzles[s];
-               for (int i = 0; i < 4; i++) {
-                  unsigned src_comp = GET_SWZ(src_swizzle, i);
-                  if (src_comp == SWIZZLE_ONE || src_comp == SWIZZLE_W) {
-                     key->swizzles[i] &= ~(0x7 << (3 * i));
-                     key->swizzles[i] |= SWIZZLE_ONE << (3 * i);
-                  }
-               }
-            }
-            FALLTHROUGH;
-            case GL_RG32F:
-               /* The channel select for green doesn't work - we have to
-                * request blue.  Haswell can use SCS for this, but Ivybridge
-                * needs a shader workaround.
-                */
-               if (devinfo->platform != INTEL_PLATFORM_HSW)
-                  key->gather_channel_quirk_mask |= 1 << s;
-               break;
-            }
-         }
-
-         /* Gfx6's gather4 is broken for UINT/SINT; we treat them as
-          * UNORM/FLOAT instead and fix it in the shader.
-          */
-         if (devinfo->ver == 6 && prog->info.uses_texture_gather) {
-            key->gfx6_gather_wa[s] = gfx6_gather_workaround(img->InternalFormat);
-         }
-
-         /* If this is a multisample sampler, and uses the CMS MSAA layout,
-          * then we need to emit slightly different code to first sample the
-          * MCS surface.
-          */
-         struct brw_texture_object *intel_tex =
-            brw_texture_object((struct gl_texture_object *)t);
-
-         /* From gfx9 onwards some single sampled buffers can also be
-          * compressed. These don't need ld2dms sampling along with mcs fetch.
-          */
-         if (intel_tex->mt->aux_usage == ISL_AUX_USAGE_MCS) {
-            assert(devinfo->ver >= 7);
-            assert(intel_tex->mt->surf.samples > 1);
-            assert(intel_tex->mt->aux_buf);
-            assert(intel_tex->mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
-            key->compressed_multisample_layout_mask |= 1 << s;
-
-            if (intel_tex->mt->surf.samples >= 16) {
-               assert(devinfo->ver >= 9);
-               key->msaa_16 |= 1 << s;
-            }
-         }
-
-         if (t->Target == GL_TEXTURE_EXTERNAL_OES && intel_tex->planar_format) {
-
-            /* Setup possible scaling factor. */
-            key->scale_factors[s] = intel_tex->planar_format->scaling_factor;
-
-            switch (intel_tex->planar_format->components) {
-            case __DRI_IMAGE_COMPONENTS_Y_UV:
-               key->y_uv_image_mask |= 1 << s;
-               break;
-            case __DRI_IMAGE_COMPONENTS_Y_U_V:
-               key->y_u_v_image_mask |= 1 << s;
-               break;
-            case __DRI_IMAGE_COMPONENTS_Y_XUXV:
-               key->yx_xuxv_image_mask |= 1 << s;
-               break;
-            case __DRI_IMAGE_COMPONENTS_Y_UXVX:
-               key->xy_uxvx_image_mask |= 1 << s;
-               break;
-            case __DRI_IMAGE_COMPONENTS_AYUV:
-               key->ayuv_image_mask |= 1 << s;
-               break;
-            case __DRI_IMAGE_COMPONENTS_XYUV:
-               key->xyuv_image_mask |= 1 << s;
-               break;
-            default:
-               break;
-            }
-
-            switch (intel_tex->yuv_color_space) {
-            case __DRI_YUV_COLOR_SPACE_ITU_REC709:
-              key->bt709_mask |= 1 << s;
-              break;
-            case __DRI_YUV_COLOR_SPACE_ITU_REC2020:
-              key->bt2020_mask |= 1 << s;
-              break;
-            default:
-              break;
-            }
-         }
-
-      }
-   }
-}
-
-void
-brw_populate_base_prog_key(struct gl_context *ctx,
-                           const struct brw_program *prog,
-                           struct brw_base_prog_key *key)
-{
-   key->program_string_id = prog->id;
-   key->subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
-   brw_populate_sampler_prog_key_data(ctx, &prog->program, &key->tex);
-}
-
-void
-brw_populate_default_base_prog_key(const struct intel_device_info *devinfo,
-                                   const struct brw_program *prog,
-                                   struct brw_base_prog_key *key)
-{
-   key->program_string_id = prog->id;
-   key->subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
-   brw_setup_tex_for_precompile(devinfo, &key->tex, &prog->program);
-}
-
-static bool
-brw_wm_state_dirty(const struct brw_context *brw)
-{
-   return brw_state_dirty(brw,
-                          _NEW_BUFFERS |
-                          _NEW_COLOR |
-                          _NEW_DEPTH |
-                          _NEW_FRAG_CLAMP |
-                          _NEW_HINT |
-                          _NEW_LIGHT |
-                          _NEW_LINE |
-                          _NEW_MULTISAMPLE |
-                          _NEW_POLYGON |
-                          _NEW_STENCIL |
-                          _NEW_TEXTURE,
-                          BRW_NEW_FRAGMENT_PROGRAM |
-                          BRW_NEW_REDUCED_PRIMITIVE |
-                          BRW_NEW_STATS_WM |
-                          BRW_NEW_VUE_MAP_GEOM_OUT);
-}
-
-void
-brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   const struct gl_program *prog = brw->programs[MESA_SHADER_FRAGMENT];
-   const struct brw_program *fp = brw_program_const(prog);
-   GLuint lookup = 0;
-   GLuint line_aa;
-
-   memset(key, 0, sizeof(*key));
-
-   /* Build the index for table lookup
-    */
-   if (devinfo->ver < 6) {
-      struct brw_renderbuffer *depth_irb =
-         brw_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
-
-      /* _NEW_COLOR */
-      if (prog->info.fs.uses_discard || ctx->Color.AlphaEnabled) {
-         lookup |= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT;
-      }
-
-      if (prog->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
-         lookup |= BRW_WM_IZ_PS_COMPUTES_DEPTH_BIT;
-      }
-
-      /* _NEW_DEPTH */
-      if (depth_irb && ctx->Depth.Test) {
-         lookup |= BRW_WM_IZ_DEPTH_TEST_ENABLE_BIT;
-
-         if (brw_depth_writes_enabled(brw))
-            lookup |= BRW_WM_IZ_DEPTH_WRITE_ENABLE_BIT;
-      }
-
-      /* _NEW_STENCIL | _NEW_BUFFERS */
-      if (brw->stencil_enabled) {
-         lookup |= BRW_WM_IZ_STENCIL_TEST_ENABLE_BIT;
-
-         if (ctx->Stencil.WriteMask[0] ||
-             ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
-            lookup |= BRW_WM_IZ_STENCIL_WRITE_ENABLE_BIT;
-      }
-      key->iz_lookup = lookup;
-   }
-
-   line_aa = BRW_WM_AA_NEVER;
-
-   /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
-   if (ctx->Line.SmoothFlag) {
-      if (brw->reduced_primitive == GL_LINES) {
-         line_aa = BRW_WM_AA_ALWAYS;
-      }
-      else if (brw->reduced_primitive == GL_TRIANGLES) {
-         if (ctx->Polygon.FrontMode == GL_LINE) {
-            line_aa = BRW_WM_AA_SOMETIMES;
-
-            if (ctx->Polygon.BackMode == GL_LINE ||
-                (ctx->Polygon.CullFlag &&
-                 ctx->Polygon.CullFaceMode == GL_BACK))
-               line_aa = BRW_WM_AA_ALWAYS;
-         }
-         else if (ctx->Polygon.BackMode == GL_LINE) {
-            line_aa = BRW_WM_AA_SOMETIMES;
-
-            if ((ctx->Polygon.CullFlag &&
-                 ctx->Polygon.CullFaceMode == GL_FRONT))
-               line_aa = BRW_WM_AA_ALWAYS;
-         }
-      }
-   }
-
-   key->line_aa = line_aa;
-
-   /* _NEW_HINT */
-   key->high_quality_derivatives =
-      prog->info.uses_fddx_fddy &&
-      ctx->Hint.FragmentShaderDerivative == GL_NICEST;
-
-   if (devinfo->ver < 6)
-      key->stats_wm = brw->stats_wm;
-
-   /* _NEW_LIGHT */
-   key->flat_shade =
-      (prog->info.inputs_read & (VARYING_BIT_COL0 | VARYING_BIT_COL1)) &&
-      (ctx->Light.ShadeModel == GL_FLAT);
-
-   /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
-   key->clamp_fragment_color = ctx->Color._ClampFragmentColor;
-
-   /* _NEW_TEXTURE */
-   brw_populate_base_prog_key(ctx, fp, &key->base);
-
-   /* _NEW_BUFFERS */
-   key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers;
-
-   /* _NEW_COLOR */
-   key->force_dual_color_blend = brw->dual_color_blend_by_location &&
-      (ctx->Color.BlendEnabled & 1) && ctx->Color._BlendUsesDualSrc & 0x1;
-
-   /* _NEW_MULTISAMPLE, _NEW_BUFFERS */
-   key->alpha_to_coverage =  _mesa_is_alpha_to_coverage_enabled(ctx);
-
-   /* _NEW_COLOR, _NEW_BUFFERS */
-   key->alpha_test_replicate_alpha =
-      ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
-      _mesa_is_alpha_test_enabled(ctx);
-
-   /* _NEW_BUFFERS _NEW_MULTISAMPLE */
-   /* Ignore sample qualifier while computing this flag. */
-   if (ctx->Multisample.Enabled) {
-      key->persample_interp =
-         ctx->Multisample.SampleShading &&
-         (ctx->Multisample.MinSampleShadingValue *
-          _mesa_geometric_samples(ctx->DrawBuffer) > 1);
-
-      key->multisample_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
-   }
-
-   key->ignore_sample_mask_out = !key->multisample_fbo;
-
-   /* BRW_NEW_VUE_MAP_GEOM_OUT */
-   if (devinfo->ver < 6 || util_bitcount64(prog->info.inputs_read &
-                                             BRW_FS_VARYING_INPUT_MASK) > 16) {
-      key->input_slots_valid = brw->vue_map_geom_out.slots_valid;
-   }
-
-   /* _NEW_COLOR | _NEW_BUFFERS */
-   /* Pre-gfx6, the hardware alpha test always used each render
-    * target's alpha to do alpha test, as opposed to render target 0's alpha
-    * like GL requires.  Fix that by building the alpha test into the
-    * shader, and we'll skip enabling the fixed function alpha test.
-    */
-   if (devinfo->ver < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
-       ctx->Color.AlphaEnabled) {
-      key->alpha_test_func = ctx->Color.AlphaFunc;
-      key->alpha_test_ref = ctx->Color.AlphaRef;
-   }
-
-   /* Whether reads from the framebuffer should behave coherently. */
-   key->coherent_fb_fetch = ctx->Extensions.EXT_shader_framebuffer_fetch;
-}
-
-void
-brw_upload_wm_prog(struct brw_context *brw)
-{
-   struct brw_wm_prog_key key;
-   struct brw_program *fp =
-      (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT];
-
-   if (!brw_wm_state_dirty(brw))
-      return;
-
-   brw_wm_populate_key(brw, &key);
-
-   if (brw_search_cache(&brw->cache, BRW_CACHE_FS_PROG, &key, sizeof(key),
-                        &brw->wm.base.prog_offset, &brw->wm.base.prog_data,
-                        true))
-      return;
-
-   if (brw_disk_cache_upload_program(brw, MESA_SHADER_FRAGMENT))
-      return;
-
-   fp = (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT];
-   fp->id = key.base.program_string_id;
-
-   ASSERTED bool success = brw_codegen_wm_prog(brw, fp, &key,
-                                                   &brw->vue_map_geom_out);
-   assert(success);
-}
-
-void
-brw_wm_populate_default_key(const struct brw_compiler *compiler,
-                            struct brw_wm_prog_key *key,
-                            struct gl_program *prog)
-{
-   const struct intel_device_info *devinfo = compiler->devinfo;
-
-   memset(key, 0, sizeof(*key));
-
-   brw_populate_default_base_prog_key(devinfo, brw_program(prog),
-                                      &key->base);
-
-   uint64_t outputs_written = prog->info.outputs_written;
-
-   if (devinfo->ver < 6) {
-      if (prog->info.fs.uses_discard)
-         key->iz_lookup |= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT;
-
-      if (outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
-         key->iz_lookup |= BRW_WM_IZ_PS_COMPUTES_DEPTH_BIT;
-
-      /* Just assume depth testing. */
-      key->iz_lookup |= BRW_WM_IZ_DEPTH_TEST_ENABLE_BIT;
-      key->iz_lookup |= BRW_WM_IZ_DEPTH_WRITE_ENABLE_BIT;
-   }
-
-   if (devinfo->ver < 6 || util_bitcount64(prog->info.inputs_read &
-                                             BRW_FS_VARYING_INPUT_MASK) > 16) {
-      key->input_slots_valid = prog->info.inputs_read | VARYING_BIT_POS;
-   }
-
-   key->nr_color_regions = util_bitcount64(outputs_written &
-         ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
-           BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
-           BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
-
-   /* Whether reads from the framebuffer should behave coherently. */
-   key->coherent_fb_fetch = devinfo->ver >= 9;
-}
-
-bool
-brw_fs_precompile(struct gl_context *ctx, struct gl_program *prog)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct brw_wm_prog_key key;
-
-   struct brw_program *bfp = brw_program(prog);
-
-   brw_wm_populate_default_key(brw->screen->compiler, &key, prog);
-
-   /* check brw_wm_populate_default_key coherent_fb_fetch setting */
-   assert(key.coherent_fb_fetch ==
-          ctx->Extensions.EXT_shader_framebuffer_fetch);
-
-   uint32_t old_prog_offset = brw->wm.base.prog_offset;
-   struct brw_stage_prog_data *old_prog_data = brw->wm.base.prog_data;
-
-   struct brw_vue_map vue_map;
-   if (devinfo->ver < 6) {
-      brw_compute_vue_map(&brw->screen->devinfo, &vue_map,
-                          prog->info.inputs_read | VARYING_BIT_POS,
-                          false, 1);
-   }
-
-   bool success = brw_codegen_wm_prog(brw, bfp, &key, &vue_map);
-
-   brw->wm.base.prog_offset = old_prog_offset;
-   brw->wm.base.prog_data = old_prog_data;
-
-   return success;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
deleted file mode 100644
index 86980c3..0000000
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-
-#ifndef BRW_WM_H
-#define BRW_WM_H
-
-#include <stdbool.h>
-
-#include "brw_context.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void
-brw_upload_wm_prog(struct brw_context *brw);
-
-void
-brw_wm_populate_key(struct brw_context *brw,
-                    struct brw_wm_prog_key *key);
-void
-brw_wm_populate_default_key(const struct brw_compiler *compiler,
-                            struct brw_wm_prog_key *key,
-                            struct gl_program *prog);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
deleted file mode 100644
index 08e9009..0000000
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ /dev/null
@@ -1,1692 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
-
-#include "compiler/nir/nir.h"
-#include "main/context.h"
-#include "main/blend.h"
-#include "main/mtypes.h"
-#include "main/samplerobj.h"
-#include "main/shaderimage.h"
-#include "main/teximage.h"
-#include "program/prog_parameter.h"
-#include "program/prog_instruction.h"
-#include "main/framebuffer.h"
-#include "main/shaderapi.h"
-
-#include "isl/isl.h"
-
-#include "brw_mipmap_tree.h"
-#include "brw_batch.h"
-#include "brw_tex.h"
-#include "brw_fbo.h"
-#include "brw_buffer_objects.h"
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "brw_wm.h"
-
-static void
-get_isl_surf(struct brw_context *brw, struct brw_mipmap_tree *mt,
-             GLenum target, struct isl_view *view,
-             uint32_t *tile_x, uint32_t *tile_y,
-             uint32_t *offset, struct isl_surf *surf)
-{
-   *surf = mt->surf;
-
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const enum isl_dim_layout dim_layout =
-      get_isl_dim_layout(devinfo, mt->surf.tiling, target);
-
-   surf->dim = get_isl_surf_dim(target);
-
-   if (surf->dim_layout == dim_layout)
-      return;
-
-   /* The layout of the specified texture target is not compatible with the
-    * actual layout of the miptree structure in memory -- You're entering
-    * dangerous territory, this can only possibly work if you only intended
-    * to access a single level and slice of the texture, and the hardware
-    * supports the tile offset feature in order to allow non-tile-aligned
-    * base offsets, since we'll have to point the hardware to the first
-    * texel of the level instead of relying on the usual base level/layer
-    * controls.
-    */
-   assert(devinfo->has_surface_tile_offset);
-   assert(view->levels == 1 && view->array_len == 1);
-   assert(*tile_x == 0 && *tile_y == 0);
-
-   *offset += brw_miptree_get_tile_offsets(mt, view->base_level,
-                                           view->base_array_layer,
-                                           tile_x, tile_y);
-
-   /* Minify the logical dimensions of the texture. */
-   const unsigned l = view->base_level - mt->first_level;
-   surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l);
-   surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 :
-      minify(surf->logical_level0_px.height, l);
-   surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 :
-      minify(surf->logical_level0_px.depth, l);
-
-   /* Only the base level and layer can be addressed with the overridden
-    * layout.
-    */
-   surf->logical_level0_px.array_len = 1;
-   surf->levels = 1;
-   surf->dim_layout = dim_layout;
-
-   /* The requested slice of the texture is now at the base level and
-    * layer.
-    */
-   view->base_level = 0;
-   view->base_array_layer = 0;
-}
-
-static void
-brw_emit_surface_state(struct brw_context *brw,
-                       struct brw_mipmap_tree *mt,
-                       GLenum target, struct isl_view view,
-                       enum isl_aux_usage aux_usage,
-                       uint32_t *surf_offset, int surf_index,
-                       unsigned reloc_flags)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   uint32_t tile_x = mt->level[0].level_x;
-   uint32_t tile_y = mt->level[0].level_y;
-   uint32_t offset = mt->offset;
-
-   struct isl_surf surf;
-
-   get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf);
-
-   union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
-
-   struct brw_bo *aux_bo = NULL;
-   struct isl_surf *aux_surf = NULL;
-   uint64_t aux_offset = 0;
-   struct brw_bo *clear_bo = NULL;
-   uint64_t clear_offset = 0;
-
-   if (aux_usage != ISL_AUX_USAGE_NONE) {
-      aux_surf = &mt->aux_buf->surf;
-      aux_bo = mt->aux_buf->bo;
-      aux_offset = mt->aux_buf->offset;
-
-      /* We only really need a clear color if we also have an auxiliary
-       * surface.  Without one, it does nothing.
-       */
-      clear_color = brw_miptree_get_clear_color(mt, &clear_bo, &clear_offset);
-   }
-
-   void *state = brw_state_batch(brw,
-                                 brw->isl_dev.ss.size,
-                                 brw->isl_dev.ss.align,
-                                 surf_offset);
-
-   isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view,
-                       .address = brw_state_reloc(&brw->batch,
-                                                  *surf_offset + brw->isl_dev.ss.addr_offset,
-                                                  mt->bo, offset, reloc_flags),
-                       .aux_surf = aux_surf, .aux_usage = aux_usage,
-                       .aux_address = aux_offset,
-                       .mocs = brw_mocs(&brw->isl_dev, mt->bo),
-                       .clear_color = clear_color,
-                       .use_clear_address = clear_bo != NULL,
-                       .clear_address = clear_offset,
-                       .x_offset_sa = tile_x, .y_offset_sa = tile_y);
-   if (aux_surf) {
-      /* On gfx7 and prior, the upper 20 bits of surface state DWORD 6 are the
-       * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
-       * contain other control information.  Since buffer addresses are always
-       * on 4k boundaries (and thus have their lower 12 bits zero), we can use
-       * an ordinary reloc to do the necessary address translation.
-       *
-       * FIXME: move to the point of assignment.
-       */
-      assert((aux_offset & 0xfff) == 0);
-
-      if (devinfo->ver >= 8) {
-         uint64_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
-         *aux_addr = brw_state_reloc(&brw->batch,
-                                     *surf_offset +
-                                     brw->isl_dev.ss.aux_addr_offset,
-                                     aux_bo, *aux_addr,
-                                     reloc_flags);
-      } else {
-         uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
-         *aux_addr = brw_state_reloc(&brw->batch,
-                                     *surf_offset +
-                                     brw->isl_dev.ss.aux_addr_offset,
-                                     aux_bo, *aux_addr,
-                                     reloc_flags);
-
-      }
-   }
-
-   if (clear_bo != NULL) {
-      /* Make sure the offset is aligned with a cacheline. */
-      assert((clear_offset & 0x3f) == 0);
-      uint64_t *clear_address =
-            state + brw->isl_dev.ss.clear_color_state_offset;
-      *clear_address = brw_state_reloc(&brw->batch,
-                                       *surf_offset +
-                                       brw->isl_dev.ss.clear_color_state_offset,
-                                       clear_bo, *clear_address, reloc_flags);
-   }
-}
-
-static uint32_t
-gfx6_update_renderbuffer_surface(struct brw_context *brw,
-                                 struct gl_renderbuffer *rb,
-                                 unsigned unit,
-                                 uint32_t surf_index)
-{
-   struct gl_context *ctx = &brw->ctx;
-   struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-   struct brw_mipmap_tree *mt = irb->mt;
-
-   assert(brw_render_target_supported(brw, rb));
-
-   mesa_format rb_format = _mesa_get_render_format(ctx, brw_rb_format(irb));
-   if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
-      _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
-                    __func__, _mesa_get_format_name(rb_format));
-   }
-   enum isl_format isl_format = brw->mesa_to_isl_render_format[rb_format];
-
-   struct isl_view view = {
-      .format = isl_format,
-      .base_level = irb->mt_level - irb->mt->first_level,
-      .levels = 1,
-      .base_array_layer = irb->mt_layer,
-      .array_len = MAX2(irb->layer_count, 1),
-      .swizzle = ISL_SWIZZLE_IDENTITY,
-      .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
-   };
-
-   uint32_t offset;
-   brw_emit_surface_state(brw, mt, mt->target, view,
-                          brw->draw_aux_usage[unit],
-                          &offset, surf_index,
-                          RELOC_WRITE);
-   return offset;
-}
-
-GLuint
-translate_tex_target(GLenum target)
-{
-   switch (target) {
-   case GL_TEXTURE_1D:
-   case GL_TEXTURE_1D_ARRAY_EXT:
-      return BRW_SURFACE_1D;
-
-   case GL_TEXTURE_RECTANGLE_NV:
-      return BRW_SURFACE_2D;
-
-   case GL_TEXTURE_2D:
-   case GL_TEXTURE_2D_ARRAY_EXT:
-   case GL_TEXTURE_EXTERNAL_OES:
-   case GL_TEXTURE_2D_MULTISAMPLE:
-   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
-      return BRW_SURFACE_2D;
-
-   case GL_TEXTURE_3D:
-      return BRW_SURFACE_3D;
-
-   case GL_TEXTURE_CUBE_MAP:
-   case GL_TEXTURE_CUBE_MAP_ARRAY:
-      return BRW_SURFACE_CUBE;
-
-   default:
-      unreachable("not reached");
-   }
-}
-
-uint32_t
-brw_get_surface_tiling_bits(enum isl_tiling tiling)
-{
-   switch (tiling) {
-   case ISL_TILING_X:
-      return BRW_SURFACE_TILED;
-   case ISL_TILING_Y0:
-      return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
-   default:
-      return 0;
-   }
-}
-
-
-uint32_t
-brw_get_surface_num_multisamples(unsigned num_samples)
-{
-   if (num_samples > 1)
-      return BRW_SURFACE_MULTISAMPLECOUNT_4;
-   else
-      return BRW_SURFACE_MULTISAMPLECOUNT_1;
-}
-
-/**
- * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
- * swizzling.
- */
-int
-brw_get_texture_swizzle(const struct gl_context *ctx,
-                        const struct gl_texture_object *t)
-{
-   const struct gl_texture_image *img = t->Image[0][t->Attrib.BaseLevel];
-
-   int swizzles[SWIZZLE_NIL + 1] = {
-      SWIZZLE_X,
-      SWIZZLE_Y,
-      SWIZZLE_Z,
-      SWIZZLE_W,
-      SWIZZLE_ZERO,
-      SWIZZLE_ONE,
-      SWIZZLE_NIL
-   };
-
-   if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
-       img->_BaseFormat == GL_DEPTH_STENCIL) {
-      GLenum depth_mode = t->Attrib.DepthMode;
-
-      /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
-       * with depth component data specified with a sized internal format.
-       * Otherwise, it's left at the old default, GL_LUMINANCE.
-       */
-      if (_mesa_is_gles3(ctx) &&
-          img->InternalFormat != GL_DEPTH_COMPONENT &&
-          img->InternalFormat != GL_DEPTH_STENCIL) {
-         depth_mode = GL_RED;
-      }
-
-      switch (depth_mode) {
-      case GL_ALPHA:
-         swizzles[0] = SWIZZLE_ZERO;
-         swizzles[1] = SWIZZLE_ZERO;
-         swizzles[2] = SWIZZLE_ZERO;
-         swizzles[3] = SWIZZLE_X;
-         break;
-      case GL_LUMINANCE:
-         swizzles[0] = SWIZZLE_X;
-         swizzles[1] = SWIZZLE_X;
-         swizzles[2] = SWIZZLE_X;
-         swizzles[3] = SWIZZLE_ONE;
-         break;
-      case GL_INTENSITY:
-         swizzles[0] = SWIZZLE_X;
-         swizzles[1] = SWIZZLE_X;
-         swizzles[2] = SWIZZLE_X;
-         swizzles[3] = SWIZZLE_X;
-         break;
-      case GL_RED:
-         swizzles[0] = SWIZZLE_X;
-         swizzles[1] = SWIZZLE_ZERO;
-         swizzles[2] = SWIZZLE_ZERO;
-         swizzles[3] = SWIZZLE_ONE;
-         break;
-      }
-   }
-
-   GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
-
-   /* If the texture's format is alpha-only, force R, G, and B to
-    * 0.0. Similarly, if the texture's format has no alpha channel,
-    * force the alpha value read to 1.0. This allows for the
-    * implementation to use an RGBA texture for any of these formats
-    * without leaking any unexpected values.
-    */
-   switch (img->_BaseFormat) {
-   case GL_ALPHA:
-      swizzles[0] = SWIZZLE_ZERO;
-      swizzles[1] = SWIZZLE_ZERO;
-      swizzles[2] = SWIZZLE_ZERO;
-      break;
-   case GL_LUMINANCE:
-      if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
-         swizzles[0] = SWIZZLE_X;
-         swizzles[1] = SWIZZLE_X;
-         swizzles[2] = SWIZZLE_X;
-         swizzles[3] = SWIZZLE_ONE;
-      }
-      break;
-   case GL_LUMINANCE_ALPHA:
-      if (datatype == GL_SIGNED_NORMALIZED) {
-         swizzles[0] = SWIZZLE_X;
-         swizzles[1] = SWIZZLE_X;
-         swizzles[2] = SWIZZLE_X;
-         swizzles[3] = SWIZZLE_W;
-      }
-      break;
-   case GL_INTENSITY:
-      if (datatype == GL_SIGNED_NORMALIZED) {
-         swizzles[0] = SWIZZLE_X;
-         swizzles[1] = SWIZZLE_X;
-         swizzles[2] = SWIZZLE_X;
-         swizzles[3] = SWIZZLE_X;
-      }
-      break;
-   case GL_RED:
-      if (img->TexFormat == MESA_FORMAT_R_SRGB8) {
-         swizzles[0] = SWIZZLE_X;
-         swizzles[1] = SWIZZLE_ZERO;
-         swizzles[2] = SWIZZLE_ZERO;
-         swizzles[3] = SWIZZLE_ONE;
-         break;
-      }
-      FALLTHROUGH;
-   case GL_RG:
-   case GL_RGB:
-      if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
-          img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
-          img->TexFormat == MESA_FORMAT_SRGB_DXT1)
-         swizzles[3] = SWIZZLE_ONE;
-      break;
-   }
-
-   return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->Attrib._Swizzle, 0)],
-                        swizzles[GET_SWZ(t->Attrib._Swizzle, 1)],
-                        swizzles[GET_SWZ(t->Attrib._Swizzle, 2)],
-                        swizzles[GET_SWZ(t->Attrib._Swizzle, 3)]);
-}
-
-/**
- * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gfx7.5+
- * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
- *
- * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
- *         0          1          2          3             4            5
- *         4          5          6          7             0            1
- *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
- *
- * which is simply adding 4 then modding by 8 (or anding with 7).
- *
- * We then may need to apply workarounds for textureGather hardware bugs.
- */
-static unsigned
-swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
-{
-   unsigned scs = (swizzle + 4) & 7;
-
-   return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
-}
-
-static void brw_update_texture_surface(struct gl_context *ctx,
-                           unsigned unit,
-                           uint32_t *surf_offset,
-                           bool for_gather,
-                           bool for_txf,
-                           uint32_t plane)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
-
-   if (obj->Target == GL_TEXTURE_BUFFER) {
-      brw_update_buffer_texture_surface(ctx, unit, surf_offset);
-
-   } else {
-      struct brw_texture_object *intel_obj = brw_texture_object(obj);
-      struct brw_mipmap_tree *mt = intel_obj->mt;
-
-      if (plane > 0) {
-         if (mt->plane[plane - 1] == NULL)
-            return;
-         mt = mt->plane[plane - 1];
-      }
-
-      struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
-      /* If this is a view with restricted NumLayers, then our effective depth
-       * is not just the miptree depth.
-       */
-      unsigned view_num_layers;
-      if (obj->Immutable && obj->Target != GL_TEXTURE_3D) {
-         view_num_layers = obj->Attrib.NumLayers;
-      } else {
-         view_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ?
-                              mt->surf.logical_level0_px.depth :
-                              mt->surf.logical_level0_px.array_len;
-      }
-
-      /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
-       * texturing functions that return a float, as our code generation always
-       * selects the .x channel (which would always be 0).
-       */
-      struct gl_texture_image *firstImage = obj->Image[0][obj->Attrib.BaseLevel];
-      const bool alpha_depth = obj->Attrib.DepthMode == GL_ALPHA &&
-         (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
-          firstImage->_BaseFormat == GL_DEPTH_STENCIL);
-      const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
-                                brw_get_texture_swizzle(&brw->ctx, obj));
-
-      mesa_format mesa_fmt;
-      if (firstImage->_BaseFormat == GL_DEPTH_STENCIL ||
-          firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
-         /* The format from intel_obj may be a combined depth stencil format
-          * when we just want depth.  Pull it from the miptree instead.  This
-          * is safe because texture views aren't allowed on depth/stencil.
-          */
-         mesa_fmt = mt->format;
-      } else if (brw_miptree_has_etc_shadow(brw, mt)) {
-         mesa_fmt = mt->shadow_mt->format;
-      } else if (plane > 0) {
-         mesa_fmt = mt->format;
-      } else {
-         mesa_fmt = intel_obj->_Format;
-      }
-      enum isl_format format = translate_tex_format(brw, mesa_fmt,
-                                                    for_txf ? GL_DECODE_EXT :
-                                                    sampler->Attrib.sRGBDecode);
-
-      /* Implement gfx6 and gfx7 gather work-around */
-      bool need_green_to_blue = false;
-      if (for_gather) {
-         if (devinfo->ver == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
-                                   format == ISL_FORMAT_R32G32_SINT ||
-                                   format == ISL_FORMAT_R32G32_UINT)) {
-            format = ISL_FORMAT_R32G32_FLOAT_LD;
-            need_green_to_blue = devinfo->platform == INTEL_PLATFORM_HSW;
-         } else if (devinfo->ver == 6) {
-            /* Sandybridge's gather4 message is broken for integer formats.
-             * To work around this, we pretend the surface is UNORM for
-             * 8 or 16-bit formats, and emit shader instructions to recover
-             * the real INT/UINT value.  For 32-bit formats, we pretend
-             * the surface is FLOAT, and simply reinterpret the resulting
-             * bits.
-             */
-            switch (format) {
-            case ISL_FORMAT_R8_SINT:
-            case ISL_FORMAT_R8_UINT:
-               format = ISL_FORMAT_R8_UNORM;
-               break;
-
-            case ISL_FORMAT_R16_SINT:
-            case ISL_FORMAT_R16_UINT:
-               format = ISL_FORMAT_R16_UNORM;
-               break;
-
-            case ISL_FORMAT_R32_SINT:
-            case ISL_FORMAT_R32_UINT:
-               format = ISL_FORMAT_R32_FLOAT;
-               break;
-
-            default:
-               break;
-            }
-         }
-      }
-
-      if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
-         if (devinfo->ver <= 7) {
-            assert(mt->shadow_mt && !mt->stencil_mt->shadow_needs_update);
-            mt = mt->shadow_mt;
-         } else {
-            mt = mt->stencil_mt;
-         }
-         format = ISL_FORMAT_R8_UINT;
-      } else if (devinfo->ver <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
-         assert(mt->shadow_mt && !mt->shadow_needs_update);
-         mt = mt->shadow_mt;
-         format = ISL_FORMAT_R8_UINT;
-      } else if (brw_miptree_needs_fake_etc(brw, mt)) {
-         assert(mt->shadow_mt && !mt->shadow_needs_update);
-         mt = mt->shadow_mt;
-      }
-
-      const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
-
-      struct isl_view view = {
-         .format = format,
-         .base_level = obj->Attrib.MinLevel + obj->Attrib.BaseLevel,
-         .levels = intel_obj->_MaxLevel - obj->Attrib.BaseLevel + 1,
-         .base_array_layer = obj->Attrib.MinLayer,
-         .array_len = view_num_layers,
-         .swizzle = {
-            .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
-            .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
-            .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
-            .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
-         },
-         .usage = ISL_SURF_USAGE_TEXTURE_BIT,
-      };
-
-      /* On Ivy Bridge and earlier, we handle texture swizzle with shader
-       * code.  The actual surface swizzle should be identity.
-       */
-      if (devinfo->verx10 <= 70)
-         view.swizzle = ISL_SWIZZLE_IDENTITY;
-
-      if (obj->Target == GL_TEXTURE_CUBE_MAP ||
-          obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
-         view.usage |= ISL_SURF_USAGE_CUBE_BIT;
-
-      enum isl_aux_usage aux_usage =
-         brw_miptree_texture_aux_usage(brw, mt, format,
-                                       brw->gfx9_astc5x5_wa_tex_mask);
-
-      brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
-                             surf_offset, surf_index,
-                             0);
-   }
-}
-
-void
-brw_emit_buffer_surface_state(struct brw_context *brw,
-                              uint32_t *out_offset,
-                              struct brw_bo *bo,
-                              unsigned buffer_offset,
-                              enum isl_format format,
-                              unsigned buffer_size,
-                              unsigned pitch,
-                              unsigned reloc_flags)
-{
-   uint32_t *dw = brw_state_batch(brw,
-                                  brw->isl_dev.ss.size,
-                                  brw->isl_dev.ss.align,
-                                  out_offset);
-
-   isl_buffer_fill_state(&brw->isl_dev, dw,
-                         .address = !bo ? buffer_offset :
-                                    brw_state_reloc(&brw->batch,
-                                                    *out_offset + brw->isl_dev.ss.addr_offset,
-                                                    bo, buffer_offset,
-                                                    reloc_flags),
-                         .size_B = buffer_size,
-                         .format = format,
-                         .swizzle = ISL_SWIZZLE_IDENTITY,
-                         .stride_B = pitch,
-                         .mocs = brw_mocs(&brw->isl_dev, bo));
-}
-
-static unsigned
-buffer_texture_range_size(struct brw_context *brw,
-                          struct gl_texture_object *obj)
-{
-   assert(obj->Target == GL_TEXTURE_BUFFER);
-   const unsigned texel_size = _mesa_get_format_bytes(obj->_BufferObjectFormat);
-   const unsigned buffer_size = (!obj->BufferObject ? 0 :
-                                 obj->BufferObject->Size);
-   const unsigned buffer_offset = MIN2(buffer_size, obj->BufferOffset);
-
-   /* The ARB_texture_buffer_specification says:
-    *
-    *    "The number of texels in the buffer texture's texel array is given by
-    *
-    *       floor(<buffer_size> / (<components> * sizeof(<base_type>)),
-    *
-    *     where <buffer_size> is the size of the buffer object, in basic
-    *     machine units and <components> and <base_type> are the element count
-    *     and base data type for elements, as specified in Table X.1.  The
-    *     number of texels in the texel array is then clamped to the
-    *     implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
-    *
-    * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
-    * so that when ISL divides by stride to obtain the number of texels, that
-    * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
-    */
-   return MIN3((unsigned)obj->BufferSize,
-               buffer_size - buffer_offset,
-               brw->ctx.Const.MaxTextureBufferSize * texel_size);
-}
-
-static void
-emit_null_surface_state(struct brw_context *brw,
-                        const struct gl_framebuffer *fb,
-                        uint32_t *out_offset);
-
-void
-brw_update_buffer_texture_surface(struct gl_context *ctx,
-                                  unsigned unit,
-                                  uint32_t *surf_offset)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
-   struct brw_buffer_object *intel_obj =
-      brw_buffer_object(tObj->BufferObject);
-   const unsigned size = buffer_texture_range_size(brw, tObj);
-   struct brw_bo *bo = NULL;
-   mesa_format format = tObj->_BufferObjectFormat;
-   const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
-   int texel_size = _mesa_get_format_bytes(format);
-
-   if (tObj->BufferObject == NULL) {
-      emit_null_surface_state(brw, NULL, surf_offset);
-      return;
-   }
-
-   if (intel_obj)
-      bo = brw_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
-                                false);
-
-   if (isl_format == ISL_FORMAT_UNSUPPORTED) {
-      _mesa_problem(NULL, "bad format %s for texture buffer\n",
-                    _mesa_get_format_name(format));
-   }
-
-   brw_emit_buffer_surface_state(brw, surf_offset, bo,
-                                 tObj->BufferOffset,
-                                 isl_format,
-                                 size,
-                                 texel_size,
-                                 0);
-}
-
-/**
- * Set up a binding table entry for use by stream output logic (transform
- * feedback).
- *
- * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
- */
-void
-brw_update_sol_surface(struct brw_context *brw,
-                       struct gl_buffer_object *buffer_obj,
-                       uint32_t *out_offset, unsigned num_vector_components,
-                       unsigned stride_dwords, unsigned offset_dwords)
-{
-   struct brw_buffer_object *intel_bo = brw_buffer_object(buffer_obj);
-   uint32_t offset_bytes = 4 * offset_dwords;
-   struct brw_bo *bo = brw_bufferobj_buffer(brw, intel_bo,
-                                            offset_bytes,
-                                            buffer_obj->Size - offset_bytes,
-                                            true);
-   uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
-   uint32_t pitch_minus_1 = 4*stride_dwords - 1;
-   size_t size_dwords = buffer_obj->Size / 4;
-   uint32_t buffer_size_minus_1, width, height, depth, surface_format;
-
-   /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
-    * too big to map using a single binding table entry?
-    */
-   assert((size_dwords - offset_dwords) / stride_dwords
-          <= BRW_MAX_NUM_BUFFER_ENTRIES);
-
-   if (size_dwords > offset_dwords + num_vector_components) {
-      /* There is room for at least 1 transform feedback output in the buffer.
-       * Compute the number of additional transform feedback outputs the
-       * buffer has room for.
-       */
-      buffer_size_minus_1 =
-         (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
-   } else {
-      /* There isn't even room for a single transform feedback output in the
-       * buffer.  We can't configure the binding table entry to prevent output
-       * entirely; we'll have to rely on the geometry shader to detect
-       * overflow.  But to minimize the damage in case of a bug, set up the
-       * binding table entry to just allow a single output.
-       */
-      buffer_size_minus_1 = 0;
-   }
-   width = buffer_size_minus_1 & 0x7f;
-   height = (buffer_size_minus_1 & 0xfff80) >> 7;
-   depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
-
-   switch (num_vector_components) {
-   case 1:
-      surface_format = ISL_FORMAT_R32_FLOAT;
-      break;
-   case 2:
-      surface_format = ISL_FORMAT_R32G32_FLOAT;
-      break;
-   case 3:
-      surface_format = ISL_FORMAT_R32G32B32_FLOAT;
-      break;
-   case 4:
-      surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
-      break;
-   default:
-      unreachable("Invalid vector size for transform feedback output");
-   }
-
-   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
-      BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
-      surface_format << BRW_SURFACE_FORMAT_SHIFT |
-      BRW_SURFACE_RC_READ_WRITE;
-   surf[1] = brw_state_reloc(&brw->batch,
-                             *out_offset + 4, bo, offset_bytes, RELOC_WRITE);
-   surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
-              height << BRW_SURFACE_HEIGHT_SHIFT);
-   surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
-              pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
-   surf[4] = 0;
-   surf[5] = 0;
-}
-
-/* Creates a new WM constant buffer reflecting the current fragment program's
- * constants, if needed by the fragment program.
- *
- * Otherwise, constants go through the CURBEs using the brw_constant_buffer
- * state atom.
- */
-static void
-brw_upload_wm_pull_constants(struct brw_context *brw)
-{
-   struct brw_stage_state *stage_state = &brw->wm.base;
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   struct brw_program *fp =
-      (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT];
-
-   /* BRW_NEW_FS_PROG_DATA */
-   struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
-
-   _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
-   /* _NEW_PROGRAM_CONSTANTS */
-   brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
-                             stage_state, prog_data);
-}
-
-const struct brw_tracked_state brw_wm_pull_constants = {
-   .dirty = {
-      .mesa = _NEW_PROGRAM_CONSTANTS,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_FRAGMENT_PROGRAM |
-             BRW_NEW_FS_PROG_DATA,
-   },
-   .emit = brw_upload_wm_pull_constants,
-};
-
-/**
- * Creates a null renderbuffer surface.
- *
- * This is used when the shader doesn't write to any color output.  An FB
- * write to target 0 will still be emitted, because that's how the thread is
- * terminated (and computed depth is returned), so we need to have the
- * hardware discard the target 0 color output..
- */
-static void
-emit_null_surface_state(struct brw_context *brw,
-                        const struct gl_framebuffer *fb,
-                        uint32_t *out_offset)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   uint32_t *surf = brw_state_batch(brw,
-                                    brw->isl_dev.ss.size,
-                                    brw->isl_dev.ss.align,
-                                    out_offset);
-
-   /* Use the fb dimensions or 1x1x1 */
-   const unsigned width   = fb ? _mesa_geometric_width(fb)   : 1;
-   const unsigned height  = fb ? _mesa_geometric_height(fb)  : 1;
-   const unsigned samples = fb ? _mesa_geometric_samples(fb) : 1;
-
-   if (devinfo->ver != 6 || samples <= 1) {
-      isl_null_fill_state(&brw->isl_dev, surf,
-                          .size = isl_extent3d(width, height, 1));
-      return;
-   }
-
-   /* On Gfx6, null render targets seem to cause GPU hangs when multisampling.
-    * So work around this problem by rendering into dummy color buffer.
-    *
-    * To decrease the amount of memory needed by the workaround buffer, we
-    * set its pitch to 128 bytes (the width of a Y tile).  This means that
-    * the amount of memory needed for the workaround buffer is
-    * (width_in_tiles + height_in_tiles - 1) tiles.
-    *
-    * Note that since the workaround buffer will be interpreted by the
-    * hardware as an interleaved multisampled buffer, we need to compute
-    * width_in_tiles and height_in_tiles by dividing the width and height
-    * by 16 rather than the normal Y-tile size of 32.
-    */
-   unsigned width_in_tiles = ALIGN(width, 16) / 16;
-   unsigned height_in_tiles = ALIGN(height, 16) / 16;
-   unsigned pitch_minus_1 = 127;
-   unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
-   brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
-                      size_needed);
-
-   surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
-              ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
-   surf[1] = brw_state_reloc(&brw->batch, *out_offset + 4,
-                             brw->wm.multisampled_null_render_target_bo,
-                             0, RELOC_WRITE);
-
-   surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
-              (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
-
-   /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
-    * Notes):
-    *
-    *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
-    */
-   surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
-              pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
-   surf[4] = BRW_SURFACE_MULTISAMPLECOUNT_4;
-   surf[5] = 0;
-}
-
-/**
- * Sets up a surface state structure to point at the given region.
- * While it is only used for the front/back buffer currently, it should be
- * usable for further buffers when doing ARB_draw_buffer support.
- */
-static uint32_t
-gfx4_update_renderbuffer_surface(struct brw_context *brw,
-                                 struct gl_renderbuffer *rb,
-                                 unsigned unit,
-                                 uint32_t surf_index)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-   struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-   struct brw_mipmap_tree *mt = irb->mt;
-   uint32_t *surf;
-   uint32_t tile_x, tile_y;
-   enum isl_format format;
-   uint32_t offset;
-   /* _NEW_BUFFERS */
-   mesa_format rb_format = _mesa_get_render_format(ctx, brw_rb_format(irb));
-   /* BRW_NEW_FS_PROG_DATA */
-
-   if (rb->TexImage && !devinfo->has_surface_tile_offset) {
-      brw_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
-
-      if (tile_x != 0 || tile_y != 0) {
-         /* Original gfx4 hardware couldn't draw to a non-tile-aligned
-          * destination in a miptree unless you actually setup your renderbuffer
-          * as a miptree and used the fragile lod/array_index/etc. controls to
-          * select the image.  So, instead, we just make a new single-level
-          * miptree and render into that.
-          */
-         brw_renderbuffer_move_to_temp(brw, irb, false);
-         assert(irb->align_wa_mt);
-         mt = irb->align_wa_mt;
-      }
-   }
-
-   surf = brw_state_batch(brw, 6 * 4, 32, &offset);
-
-   format = brw->mesa_to_isl_render_format[rb_format];
-   if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
-      _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
-                    __func__, _mesa_get_format_name(rb_format));
-   }
-
-   surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
-              format << BRW_SURFACE_FORMAT_SHIFT);
-
-   /* reloc */
-   assert(mt->offset % mt->cpp == 0);
-   surf[1] = brw_state_reloc(&brw->batch, offset + 4, mt->bo,
-                             mt->offset +
-                             brw_renderbuffer_get_tile_offsets(irb,
-                                                                 &tile_x,
-                                                                 &tile_y),
-                             RELOC_WRITE);
-
-   surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
-              (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
-
-   surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) |
-              (mt->surf.row_pitch_B - 1) << BRW_SURFACE_PITCH_SHIFT);
-
-   surf[4] = brw_get_surface_num_multisamples(mt->surf.samples);
-
-   assert(devinfo->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
-   /* Note that the low bits of these fields are missing, so
-    * there's the possibility of getting in trouble.
-    */
-   assert(tile_x % 4 == 0);
-   assert(tile_y % 2 == 0);
-   surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
-              (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
-              (mt->surf.image_alignment_el.height == 4 ?
-                  BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
-
-   if (devinfo->ver < 6) {
-      /* _NEW_COLOR */
-      if (!ctx->Color.ColorLogicOpEnabled &&
-          ctx->Color._AdvancedBlendMode == BLEND_NONE &&
-          (ctx->Color.BlendEnabled & (1 << unit)))
-         surf[0] |= BRW_SURFACE_BLEND_ENABLED;
-
-      if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 0))
-         surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
-      if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 1))
-         surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
-      if (!GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 2))
-         surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
-
-      /* As mentioned above, disable writes to the alpha component when the
-       * renderbuffer is XRGB.
-       */
-      if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
-          !GET_COLORMASK_BIT(ctx->Color.ColorMask, unit, 3)) {
-         surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
-      }
-   }
-
-   return offset;
-}
-
-static void
-update_renderbuffer_surfaces(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const struct gl_context *ctx = &brw->ctx;
-
-   /* _NEW_BUFFERS | _NEW_COLOR */
-   const struct gl_framebuffer *fb = ctx->DrawBuffer;
-
-   /* Render targets always start at binding table index 0. */
-   const unsigned rt_start = 0;
-
-   uint32_t *surf_offsets = brw->wm.base.surf_offset;
-
-   /* Update surfaces for drawing buffers */
-   if (fb->_NumColorDrawBuffers >= 1) {
-      for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
-         struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
-
-         if (brw_renderbuffer(rb)) {
-            surf_offsets[rt_start + i] = devinfo->ver >= 6 ?
-               gfx6_update_renderbuffer_surface(brw, rb, i, rt_start + i) :
-               gfx4_update_renderbuffer_surface(brw, rb, i, rt_start + i);
-         } else {
-            emit_null_surface_state(brw, fb, &surf_offsets[rt_start + i]);
-         }
-      }
-   } else {
-      emit_null_surface_state(brw, fb, &surf_offsets[rt_start]);
-   }
-
-   /* The PIPE_CONTROL command description says:
-    *
-    * "Whenever a Binding Table Index (BTI) used by a Render Taget Message
-    *  points to a different RENDER_SURFACE_STATE, SW must issue a Render
-    *  Target Cache Flush by enabling this bit. When render target flush
-    *  is set due to new association of BTI, PS Scoreboard Stall bit must
-    *  be set in this packet."
-   */
-   if (devinfo->ver >= 11) {
-      brw_emit_pipe_control_flush(brw,
-                                  PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                                  PIPE_CONTROL_STALL_AT_SCOREBOARD);
-   }
-
-   brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
-}
-
-const struct brw_tracked_state brw_renderbuffer_surfaces = {
-   .dirty = {
-      .mesa = _NEW_BUFFERS |
-              _NEW_COLOR,
-      .brw = BRW_NEW_BATCH,
-   },
-   .emit = update_renderbuffer_surfaces,
-};
-
-const struct brw_tracked_state gfx6_renderbuffer_surfaces = {
-   .dirty = {
-      .mesa = _NEW_BUFFERS,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_AUX_STATE,
-   },
-   .emit = update_renderbuffer_surfaces,
-};
-
-static void
-update_renderbuffer_read_surfaces(struct brw_context *brw)
-{
-   const struct gl_context *ctx = &brw->ctx;
-
-   /* BRW_NEW_FS_PROG_DATA */
-   const struct brw_wm_prog_data *wm_prog_data =
-      brw_wm_prog_data(brw->wm.base.prog_data);
-
-   if (wm_prog_data->has_render_target_reads &&
-       !ctx->Extensions.EXT_shader_framebuffer_fetch) {
-      /* _NEW_BUFFERS */
-      const struct gl_framebuffer *fb = ctx->DrawBuffer;
-
-      for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
-         struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
-         const struct brw_renderbuffer *irb = brw_renderbuffer(rb);
-         const unsigned surf_index =
-            wm_prog_data->binding_table.render_target_read_start + i;
-         uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
-
-         if (irb) {
-            const enum isl_format format = brw->mesa_to_isl_render_format[
-               _mesa_get_render_format(ctx, brw_rb_format(irb))];
-            assert(isl_format_supports_sampling(&brw->screen->devinfo,
-                                                format));
-
-            /* Override the target of the texture if the render buffer is a
-             * single slice of a 3D texture (since the minimum array element
-             * field of the surface state structure is ignored by the sampler
-             * unit for 3D textures on some hardware), or if the render buffer
-             * is a 1D array (since shaders always provide the array index
-             * coordinate at the Z component to avoid state-dependent
-             * recompiles when changing the texture target of the
-             * framebuffer).
-             */
-            const GLenum target =
-               (irb->mt->target == GL_TEXTURE_3D &&
-                irb->layer_count == 1) ? GL_TEXTURE_2D :
-               irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
-               irb->mt->target;
-
-            const struct isl_view view = {
-               .format = format,
-               .base_level = irb->mt_level - irb->mt->first_level,
-               .levels = 1,
-               .base_array_layer = irb->mt_layer,
-               .array_len = irb->layer_count,
-               .swizzle = ISL_SWIZZLE_IDENTITY,
-               .usage = ISL_SURF_USAGE_TEXTURE_BIT,
-            };
-
-            enum isl_aux_usage aux_usage =
-               brw_miptree_texture_aux_usage(brw, irb->mt, format,
-                                             brw->gfx9_astc5x5_wa_tex_mask);
-            if (brw->draw_aux_usage[i] == ISL_AUX_USAGE_NONE)
-               aux_usage = ISL_AUX_USAGE_NONE;
-
-            brw_emit_surface_state(brw, irb->mt, target, view, aux_usage,
-                                   surf_offset, surf_index,
-                                   0);
-
-         } else {
-            emit_null_surface_state(brw, fb, surf_offset);
-         }
-      }
-
-      brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
-   }
-}
-
-const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
-   .dirty = {
-      .mesa = _NEW_BUFFERS,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_AUX_STATE |
-             BRW_NEW_FS_PROG_DATA,
-   },
-   .emit = update_renderbuffer_read_surfaces,
-};
-
-static bool
-is_depth_texture(struct brw_texture_object *iobj)
-{
-   GLenum base_format = _mesa_get_format_base_format(iobj->_Format);
-   return base_format == GL_DEPTH_COMPONENT ||
-          (base_format == GL_DEPTH_STENCIL && !iobj->base.StencilSampling);
-}
-
-static void
-update_stage_texture_surfaces(struct brw_context *brw,
-                              const struct gl_program *prog,
-                              struct brw_stage_state *stage_state,
-                              bool for_gather, uint32_t plane)
-{
-   if (!prog)
-      return;
-
-   struct gl_context *ctx = &brw->ctx;
-
-   uint32_t *surf_offset = stage_state->surf_offset;
-
-   /* BRW_NEW_*_PROG_DATA */
-   if (for_gather)
-      surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
-   else
-      surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
-
-   unsigned num_samplers = BITSET_LAST_BIT(prog->info.textures_used);
-   for (unsigned s = 0; s < num_samplers; s++) {
-      surf_offset[s] = 0;
-
-      if (BITSET_TEST(prog->info.textures_used, s)) {
-         const unsigned unit = prog->SamplerUnits[s];
-         const bool used_by_txf = BITSET_TEST(prog->info.textures_used_by_txf, s);
-         struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
-         struct brw_texture_object *iobj = brw_texture_object(obj);
-
-         /* _NEW_TEXTURE */
-         if (!obj)
-            continue;
-
-         if ((prog->ShadowSamplers & (1 << s)) && !is_depth_texture(iobj)) {
-            /* A programming note for the sample_c message says:
-             *
-             *    "The Surface Format of the associated surface must be
-             *     indicated as supporting shadow mapping as indicated in the
-             *     surface format table."
-             *
-             * Accessing non-depth textures via a sampler*Shadow type is
-             * undefined.  GLSL 4.50 page 162 says:
-             *
-             *    "If a shadow texture call is made to a sampler that does not
-             *     represent a depth texture, then results are undefined."
-             *
-             * We give them a null surface (zeros) for undefined.  We've seen
-             * GPU hangs with color buffers and sample_c, so we try and avoid
-             * those with this hack.
-             */
-            emit_null_surface_state(brw, NULL, surf_offset + s);
-         } else {
-            brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather,
-                                       used_by_txf, plane);
-         }
-      }
-   }
-}
-
-
-/**
- * Construct SURFACE_STATE objects for enabled textures.
- */
-static void
-brw_update_texture_surfaces(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   /* BRW_NEW_VERTEX_PROGRAM */
-   struct gl_program *vs = brw->programs[MESA_SHADER_VERTEX];
-
-   /* BRW_NEW_TESS_PROGRAMS */
-   struct gl_program *tcs = brw->programs[MESA_SHADER_TESS_CTRL];
-   struct gl_program *tes = brw->programs[MESA_SHADER_TESS_EVAL];
-
-   /* BRW_NEW_GEOMETRY_PROGRAM */
-   struct gl_program *gs = brw->programs[MESA_SHADER_GEOMETRY];
-
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   struct gl_program *fs = brw->programs[MESA_SHADER_FRAGMENT];
-
-   /* _NEW_TEXTURE */
-   update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
-   update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
-   update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
-   update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
-   update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
-
-   /* emit alternate set of surface state for gather. this
-    * allows the surface format to be overriden for only the
-    * gather4 messages. */
-   if (devinfo->ver < 8) {
-      if (vs && vs->info.uses_texture_gather)
-         update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
-      if (tcs && tcs->info.uses_texture_gather)
-         update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
-      if (tes && tes->info.uses_texture_gather)
-         update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
-      if (gs && gs->info.uses_texture_gather)
-         update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
-      if (fs && fs->info.uses_texture_gather)
-         update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
-   }
-
-   if (fs) {
-      update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
-      update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
-   }
-
-   brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
-}
-
-const struct brw_tracked_state brw_texture_surfaces = {
-   .dirty = {
-      .mesa = _NEW_TEXTURE,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_AUX_STATE |
-             BRW_NEW_FRAGMENT_PROGRAM |
-             BRW_NEW_FS_PROG_DATA |
-             BRW_NEW_GEOMETRY_PROGRAM |
-             BRW_NEW_GS_PROG_DATA |
-             BRW_NEW_TESS_PROGRAMS |
-             BRW_NEW_TCS_PROG_DATA |
-             BRW_NEW_TES_PROG_DATA |
-             BRW_NEW_TEXTURE_BUFFER |
-             BRW_NEW_VERTEX_PROGRAM |
-             BRW_NEW_VS_PROG_DATA,
-   },
-   .emit = brw_update_texture_surfaces,
-};
-
-static void
-brw_update_cs_texture_surfaces(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   /* BRW_NEW_COMPUTE_PROGRAM */
-   struct gl_program *cs = brw->programs[MESA_SHADER_COMPUTE];
-
-   /* _NEW_TEXTURE */
-   update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
-
-   /* emit alternate set of surface state for gather. this
-    * allows the surface format to be overriden for only the
-    * gather4 messages.
-    */
-   if (devinfo->ver < 8) {
-      if (cs && cs->info.uses_texture_gather)
-         update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
-   }
-
-   brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
-}
-
-const struct brw_tracked_state brw_cs_texture_surfaces = {
-   .dirty = {
-      .mesa = _NEW_TEXTURE,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_COMPUTE_PROGRAM |
-             BRW_NEW_AUX_STATE,
-   },
-   .emit = brw_update_cs_texture_surfaces,
-};
-
-static void
-upload_buffer_surface(struct brw_context *brw,
-                      struct gl_buffer_binding *binding,
-                      uint32_t *out_offset,
-                      enum isl_format format,
-                      unsigned reloc_flags)
-{
-   if (!binding->BufferObject) {
-      emit_null_surface_state(brw, NULL, out_offset);
-   } else {
-      ptrdiff_t size = binding->BufferObject->Size - binding->Offset;
-      if (!binding->AutomaticSize)
-         size = MIN2(size, binding->Size);
-
-      if (size == 0) {
-         emit_null_surface_state(brw, NULL, out_offset);
-         return;
-      }
-
-      struct brw_buffer_object *iobj =
-         brw_buffer_object(binding->BufferObject);
-      struct brw_bo *bo =
-         brw_bufferobj_buffer(brw, iobj, binding->Offset, size,
-                              (reloc_flags & RELOC_WRITE) != 0);
-
-      brw_emit_buffer_surface_state(brw, out_offset, bo, binding->Offset,
-                                    format, size, 1, reloc_flags);
-   }
-}
-
-void
-brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
-                        struct brw_stage_state *stage_state,
-                        struct brw_stage_prog_data *prog_data)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   if (!prog || (prog->info.num_ubos == 0 &&
-                 prog->info.num_ssbos == 0 &&
-                 prog->info.num_abos == 0))
-      return;
-
-   if (prog->info.num_ubos) {
-      assert(prog_data->binding_table.ubo_start < BRW_MAX_SURFACES);
-      uint32_t *ubo_surf_offsets =
-         &stage_state->surf_offset[prog_data->binding_table.ubo_start];
-
-      for (int i = 0; i < prog->info.num_ubos; i++) {
-         struct gl_buffer_binding *binding =
-            &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
-         upload_buffer_surface(brw, binding, &ubo_surf_offsets[i],
-                               ISL_FORMAT_R32G32B32A32_FLOAT, 0);
-      }
-   }
-
-   if (prog->info.num_ssbos || prog->info.num_abos) {
-      assert(prog_data->binding_table.ssbo_start < BRW_MAX_SURFACES);
-      uint32_t *ssbo_surf_offsets =
-         &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
-      uint32_t *abo_surf_offsets = ssbo_surf_offsets + prog->info.num_ssbos;
-
-      for (int i = 0; i < prog->info.num_abos; i++) {
-         struct gl_buffer_binding *binding =
-            &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
-         upload_buffer_surface(brw, binding, &abo_surf_offsets[i],
-                               ISL_FORMAT_RAW, RELOC_WRITE);
-      }
-
-      for (int i = 0; i < prog->info.num_ssbos; i++) {
-         struct gl_buffer_binding *binding =
-            &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
-
-         upload_buffer_surface(brw, binding, &ssbo_surf_offsets[i],
-                               ISL_FORMAT_RAW, RELOC_WRITE);
-      }
-   }
-
-   stage_state->push_constants_dirty = true;
-   brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
-}
-
-static void
-brw_upload_wm_ubo_surfaces(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   /* _NEW_PROGRAM */
-   struct gl_program *prog = ctx->FragmentProgram._Current;
-
-   /* BRW_NEW_FS_PROG_DATA */
-   brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
-}
-
-const struct brw_tracked_state brw_wm_ubo_surfaces = {
-   .dirty = {
-      .mesa = _NEW_PROGRAM,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_FS_PROG_DATA |
-             BRW_NEW_UNIFORM_BUFFER,
-   },
-   .emit = brw_upload_wm_ubo_surfaces,
-};
-
-static void
-brw_upload_cs_ubo_surfaces(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   /* _NEW_PROGRAM */
-   struct gl_program *prog =
-      ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
-
-   /* BRW_NEW_CS_PROG_DATA */
-   brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
-}
-
-const struct brw_tracked_state brw_cs_ubo_surfaces = {
-   .dirty = {
-      .mesa = _NEW_PROGRAM,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_CS_PROG_DATA |
-             BRW_NEW_UNIFORM_BUFFER,
-   },
-   .emit = brw_upload_cs_ubo_surfaces,
-};
-
-static void
-brw_upload_cs_image_surfaces(struct brw_context *brw)
-{
-   /* _NEW_PROGRAM */
-   const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE];
-
-   if (cp) {
-      /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
-      brw_upload_image_surfaces(brw, cp, &brw->cs.base,
-                                brw->cs.base.prog_data);
-   }
-}
-
-const struct brw_tracked_state brw_cs_image_surfaces = {
-   .dirty = {
-      .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_CS_PROG_DATA |
-             BRW_NEW_AUX_STATE |
-             BRW_NEW_IMAGE_UNITS
-   },
-   .emit = brw_upload_cs_image_surfaces,
-};
-
-static uint32_t
-get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
-   if (access == GL_WRITE_ONLY || access == GL_NONE) {
-      return hw_format;
-   } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
-      /* Typed surface reads support a very limited subset of the shader
-       * image formats.  Translate it into the closest format the
-       * hardware supports.
-       */
-      return isl_lower_storage_image_format(devinfo, hw_format);
-   } else {
-      /* The hardware doesn't actually support a typed format that we can use
-       * so we have to fall back to untyped read/write messages.
-       */
-      return ISL_FORMAT_RAW;
-   }
-}
-
-static void
-update_default_image_param(struct brw_context *brw,
-                           struct gl_image_unit *u,
-                           struct brw_image_param *param)
-{
-   memset(param, 0, sizeof(*param));
-   /* Set the swizzling shifts to all-ones to effectively disable swizzling --
-    * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
-    * detailed explanation of these parameters.
-    */
-   param->swizzling[0] = 0xff;
-   param->swizzling[1] = 0xff;
-}
-
-static void
-update_buffer_image_param(struct brw_context *brw,
-                          struct gl_image_unit *u,
-                          struct brw_image_param *param)
-{
-   const unsigned size = buffer_texture_range_size(brw, u->TexObj);
-   update_default_image_param(brw, u, param);
-
-   param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
-   param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
-}
-
-static void
-update_image_surface(struct brw_context *brw,
-                     struct gl_image_unit *u,
-                     GLenum access,
-                     uint32_t *surf_offset,
-                     struct brw_image_param *param)
-{
-   if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
-      struct gl_texture_object *obj = u->TexObj;
-      const unsigned format = get_image_format(brw, u->_ActualFormat, access);
-      const bool written = (access != GL_READ_ONLY && access != GL_NONE);
-
-      if (obj->Target == GL_TEXTURE_BUFFER) {
-         const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
-                                      _mesa_get_format_bytes(u->_ActualFormat));
-         const unsigned buffer_size = buffer_texture_range_size(brw, obj);
-         struct brw_bo *const bo = !obj->BufferObject ? NULL :
-            brw_bufferobj_buffer(brw, brw_buffer_object(obj->BufferObject),
-                                 obj->BufferOffset, buffer_size, written);
-
-         brw_emit_buffer_surface_state(
-            brw, surf_offset, bo, obj->BufferOffset,
-            format, buffer_size, texel_size,
-            written ? RELOC_WRITE : 0);
-
-         update_buffer_image_param(brw, u, param);
-
-      } else {
-         struct brw_texture_object *intel_obj = brw_texture_object(obj);
-         struct brw_mipmap_tree *mt = intel_obj->mt;
-
-         unsigned base_layer, num_layers;
-         if (u->Layered) {
-            if (obj->Target == GL_TEXTURE_3D) {
-               base_layer = 0;
-               num_layers = minify(mt->surf.logical_level0_px.depth, u->Level);
-            } else {
-               assert(obj->Immutable || obj->Attrib.MinLayer == 0);
-               base_layer = obj->Attrib.MinLayer;
-               num_layers = obj->Immutable ?
-                                obj->Attrib.NumLayers :
-                                mt->surf.logical_level0_px.array_len;
-            }
-         } else {
-            base_layer = obj->Attrib.MinLayer + u->_Layer;
-            num_layers = 1;
-         }
-
-         struct isl_view view = {
-            .format = format,
-            .base_level = obj->Attrib.MinLevel + u->Level,
-            .levels = 1,
-            .base_array_layer = base_layer,
-            .array_len = num_layers,
-            .swizzle = ISL_SWIZZLE_IDENTITY,
-            .usage = ISL_SURF_USAGE_STORAGE_BIT,
-         };
-
-         if (format == ISL_FORMAT_RAW) {
-            brw_emit_buffer_surface_state(
-               brw, surf_offset, mt->bo, mt->offset,
-               format, mt->bo->size - mt->offset, 1 /* pitch */,
-               written ? RELOC_WRITE : 0);
-
-         } else {
-            const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
-            assert(!brw_miptree_has_color_unresolved(mt,
-                                                     view.base_level, 1,
-                                                     view.base_array_layer,
-                                                     view.array_len));
-            brw_emit_surface_state(brw, mt, mt->target, view,
-                                   ISL_AUX_USAGE_NONE,
-                                   surf_offset, surf_index,
-                                   written ? RELOC_WRITE : 0);
-         }
-
-         isl_surf_fill_image_param(&brw->isl_dev, param, &mt->surf, &view);
-      }
-
-   } else {
-      emit_null_surface_state(brw, NULL, surf_offset);
-      update_default_image_param(brw, u, param);
-   }
-}
-
-void
-brw_upload_image_surfaces(struct brw_context *brw,
-                          const struct gl_program *prog,
-                          struct brw_stage_state *stage_state,
-                          struct brw_stage_prog_data *prog_data)
-{
-   assert(prog);
-   struct gl_context *ctx = &brw->ctx;
-
-   if (prog->info.num_images) {
-      for (unsigned i = 0; i < prog->info.num_images; i++) {
-         struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
-         const unsigned surf_idx = prog_data->binding_table.image_start + i;
-
-         update_image_surface(brw, u, prog->sh.ImageAccess[i],
-                              &stage_state->surf_offset[surf_idx],
-                              &stage_state->image_param[i]);
-      }
-
-      brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
-      /* This may have changed the image metadata dependent on the context
-       * image unit state and passed to the program as uniforms, make sure
-       * that push and pull constants are reuploaded.
-       */
-      brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
-   }
-}
-
-static void
-brw_upload_wm_image_surfaces(struct brw_context *brw)
-{
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   const struct gl_program *wm = brw->programs[MESA_SHADER_FRAGMENT];
-
-   if (wm) {
-      /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
-      brw_upload_image_surfaces(brw, wm, &brw->wm.base,
-                                brw->wm.base.prog_data);
-   }
-}
-
-const struct brw_tracked_state brw_wm_image_surfaces = {
-   .dirty = {
-      .mesa = _NEW_TEXTURE,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_AUX_STATE |
-             BRW_NEW_FRAGMENT_PROGRAM |
-             BRW_NEW_FS_PROG_DATA |
-             BRW_NEW_IMAGE_UNITS
-   },
-   .emit = brw_upload_wm_image_surfaces,
-};
-
-static void
-brw_upload_cs_work_groups_surface(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   /* _NEW_PROGRAM */
-   struct gl_program *prog =
-      ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
-   /* BRW_NEW_CS_PROG_DATA */
-   const struct brw_cs_prog_data *cs_prog_data =
-      brw_cs_prog_data(brw->cs.base.prog_data);
-
-   if (prog && cs_prog_data->uses_num_work_groups) {
-      const unsigned surf_idx =
-         cs_prog_data->binding_table.work_groups_start;
-      uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
-      struct brw_bo *bo;
-      uint32_t bo_offset;
-
-      if (brw->compute.num_work_groups_bo == NULL) {
-         bo = NULL;
-         brw_upload_data(&brw->upload,
-                         (void *)brw->compute.num_work_groups,
-                         3 * sizeof(GLuint),
-                         sizeof(GLuint),
-                         &bo,
-                         &bo_offset);
-      } else {
-         bo = brw->compute.num_work_groups_bo;
-         bo_offset = brw->compute.num_work_groups_offset;
-      }
-
-      brw_emit_buffer_surface_state(brw, surf_offset,
-                                    bo, bo_offset,
-                                    ISL_FORMAT_RAW,
-                                    3 * sizeof(GLuint), 1,
-                                    RELOC_WRITE);
-
-      /* The state buffer now holds a reference to our upload, drop ours. */
-      if (bo != brw->compute.num_work_groups_bo)
-         brw_bo_unreference(bo);
-
-      brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
-   }
-}
-
-const struct brw_tracked_state brw_cs_work_groups_surface = {
-   .dirty = {
-      .brw = BRW_NEW_CS_PROG_DATA |
-             BRW_NEW_CS_WORK_GROUPS
-   },
-   .emit = brw_upload_cs_work_groups_surface,
-};
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
deleted file mode 100644
index aed53d9..0000000
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ /dev/null
@@ -1,412 +0,0 @@
-/*
- * Copyright Â© 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <assert.h>
-
-#include "brw_batch.h"
-#include "brw_mipmap_tree.h"
-#include "brw_fbo.h"
-
-#include "brw_context.h"
-#include "brw_state.h"
-
-#include "blorp/blorp_genX_exec.h"
-
-#if GFX_VER <= 5
-#include "gfx4_blorp_exec.h"
-#endif
-
-#include "brw_blorp.h"
-
-static void blorp_measure_start(struct blorp_batch *batch,
-                                const struct blorp_params *params) { }
-
-static void *
-blorp_emit_dwords(struct blorp_batch *batch, unsigned n)
-{
-   assert(batch->blorp->driver_ctx == batch->driver_batch);
-   struct brw_context *brw = batch->driver_batch;
-
-   brw_batch_begin(brw, n);
-   uint32_t *map = brw->batch.map_next;
-   brw->batch.map_next += n;
-   brw_batch_advance(brw);
-   return map;
-}
-
-static uint64_t
-blorp_emit_reloc(struct blorp_batch *batch,
-                 void *location, struct blorp_address address, uint32_t delta)
-{
-   assert(batch->blorp->driver_ctx == batch->driver_batch);
-   struct brw_context *brw = batch->driver_batch;
-   uint32_t offset;
-
-   if (GFX_VER < 6 && brw_ptr_in_state_buffer(&brw->batch, location)) {
-      offset = (char *)location - (char *)brw->batch.state.map;
-      return brw_state_reloc(&brw->batch, offset,
-                             address.buffer, address.offset + delta,
-                             address.reloc_flags);
-   }
-
-   assert(!brw_ptr_in_state_buffer(&brw->batch, location));
-
-   offset = (char *)location - (char *)brw->batch.batch.map;
-   return brw_batch_reloc(&brw->batch, offset,
-                          address.buffer, address.offset + delta,
-                          address.reloc_flags);
-}
-
-static void
-blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
-                    struct blorp_address address, uint32_t delta)
-{
-   assert(batch->blorp->driver_ctx == batch->driver_batch);
-   struct brw_context *brw = batch->driver_batch;
-   struct brw_bo *bo = address.buffer;
-
-   uint64_t reloc_val =
-      brw_state_reloc(&brw->batch, ss_offset, bo, address.offset + delta,
-                      address.reloc_flags);
-
-   void *reloc_ptr = (void *)brw->batch.state.map + ss_offset;
-#if GFX_VER >= 8
-   *(uint64_t *)reloc_ptr = reloc_val;
-#else
-   *(uint32_t *)reloc_ptr = reloc_val;
-#endif
-}
-
-static uint64_t
-blorp_get_surface_address(UNUSED struct blorp_batch *blorp_batch,
-                          UNUSED struct blorp_address address)
-{
-   /* We'll let blorp_surface_reloc write the address. */
-   return 0ull;
-}
-
-#if GFX_VER >= 7 && GFX_VER < 10
-static struct blorp_address
-blorp_get_surface_base_address(struct blorp_batch *batch)
-{
-   assert(batch->blorp->driver_ctx == batch->driver_batch);
-   struct brw_context *brw = batch->driver_batch;
-   return (struct blorp_address) {
-      .buffer = brw->batch.state.bo,
-      .offset = 0,
-   };
-}
-#endif
-
-static void *
-blorp_alloc_dynamic_state(struct blorp_batch *batch,
-                          uint32_t size,
-                          uint32_t alignment,
-                          uint32_t *offset)
-{
-   assert(batch->blorp->driver_ctx == batch->driver_batch);
-   struct brw_context *brw = batch->driver_batch;
-
-   return brw_state_batch(brw, size, alignment, offset);
-}
-
-UNUSED static void *
-blorp_alloc_general_state(struct blorp_batch *blorp_batch,
-                          uint32_t size,
-                          uint32_t alignment,
-                          uint32_t *offset)
-{
-   /* Use dynamic state range for general state on i965. */
-   return blorp_alloc_dynamic_state(blorp_batch, size, alignment, offset);
-}
-
-static void
-blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
-                          unsigned state_size, unsigned state_alignment,
-                          uint32_t *bt_offset, uint32_t *surface_offsets,
-                          void **surface_maps)
-{
-   assert(batch->blorp->driver_ctx == batch->driver_batch);
-   struct brw_context *brw = batch->driver_batch;
-
-   uint32_t *bt_map = brw_state_batch(brw,
-                                      num_entries * sizeof(uint32_t), 32,
-                                      bt_offset);
-
-   for (unsigned i = 0; i < num_entries; i++) {
-      surface_maps[i] = brw_state_batch(brw,
-                                        state_size, state_alignment,
-                                        &(surface_offsets)[i]);
-      bt_map[i] = surface_offsets[i];
-   }
-}
-
-static void *
-blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
-                          struct blorp_address *addr)
-{
-   assert(batch->blorp->driver_ctx == batch->driver_batch);
-   struct brw_context *brw = batch->driver_batch;
-
-   /* From the Skylake PRM, 3DSTATE_VERTEX_BUFFERS:
-    *
-    *    "The VF cache needs to be invalidated before binding and then using
-    *    Vertex Buffers that overlap with any previously bound Vertex Buffer
-    *    (at a 64B granularity) since the last invalidation.  A VF cache
-    *    invalidate is performed by setting the "VF Cache Invalidation Enable"
-    *    bit in PIPE_CONTROL."
-    *
-    * This restriction first appears in the Skylake PRM but the internal docs
-    * also list it as being an issue on Broadwell.  In order to avoid this
-    * problem, we align all vertex buffer allocations to 64 bytes.
-    */
-   uint32_t offset;
-   void *data = brw_state_batch(brw, size, 64, &offset);
-
-   *addr = (struct blorp_address) {
-      .buffer = brw->batch.state.bo,
-      .offset = offset,
-
-      /* The VF cache designers apparently cut corners, and made the cache
-       * only consider the bottom 32 bits of memory addresses.  If you happen
-       * to have two vertex buffers which get placed exactly 4 GiB apart and
-       * use them in back-to-back draw calls, you can get collisions.  To work
-       * around this problem, we restrict vertex buffers to the low 32 bits of
-       * the address space.
-       */
-      .reloc_flags = RELOC_32BIT,
-
-      .mocs = brw_mocs(&brw->isl_dev, brw->batch.state.bo),
-   };
-
-   return data;
-}
-
-/**
- * See vf_invalidate_for_vb_48b_transitions in genX_state_upload.c.
- */
-static void
-blorp_vf_invalidate_for_vb_48b_transitions(UNUSED struct blorp_batch *batch,
-                                           UNUSED const struct blorp_address *addrs,
-                                           UNUSED uint32_t *sizes,
-                                           UNUSED unsigned num_vbs)
-{
-#if GFX_VER >= 8 && GFX_VER < 11
-   struct brw_context *brw = batch->driver_batch;
-   bool need_invalidate = false;
-
-   for (unsigned i = 0; i < num_vbs; i++) {
-      struct brw_bo *bo = addrs[i].buffer;
-      uint16_t high_bits =
-         bo && (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32u : 0;
-
-      if (high_bits != brw->vb.last_bo_high_bits[i]) {
-         need_invalidate = true;
-         brw->vb.last_bo_high_bits[i] = high_bits;
-      }
-   }
-
-   if (need_invalidate) {
-      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_CS_STALL);
-   }
-#endif
-}
-
-UNUSED static struct blorp_address
-blorp_get_workaround_address(struct blorp_batch *batch)
-{
-   assert(batch->blorp->driver_ctx == batch->driver_batch);
-   struct brw_context *brw = batch->driver_batch;
-
-   return (struct blorp_address) {
-      .buffer = brw->workaround_bo,
-      .offset = brw->workaround_bo_offset,
-   };
-}
-
-static void
-blorp_flush_range(UNUSED struct blorp_batch *batch, UNUSED void *start,
-                  UNUSED size_t size)
-{
-   /* All allocated states come from the batch which we will flush before we
-    * submit it.  There's nothing for us to do here.
-    */
-}
-
-#if GFX_VER >= 7
-static const struct intel_l3_config *
-blorp_get_l3_config(struct blorp_batch *batch)
-{
-   assert(batch->blorp->driver_ctx == batch->driver_batch);
-   struct brw_context *brw = batch->driver_batch;
-
-   return brw->l3.config;
-}
-#else /* GFX_VER < 7 */
-static void
-blorp_emit_urb_config(struct blorp_batch *batch,
-                      unsigned vs_entry_size,
-                      UNUSED unsigned sf_entry_size)
-{
-   assert(batch->blorp->driver_ctx == batch->driver_batch);
-   struct brw_context *brw = batch->driver_batch;
-
-#if GFX_VER == 6
-   gfx6_upload_urb(brw, vs_entry_size, false, 0);
-#else
-   /* We calculate it now and emit later. */
-   brw_calculate_urb_fence(brw, 0, vs_entry_size, sf_entry_size);
-#endif
-}
-#endif
-
-void
-genX(blorp_exec)(struct blorp_batch *batch,
-                 const struct blorp_params *params)
-{
-   assert(batch->blorp->driver_ctx == batch->driver_batch);
-   struct brw_context *brw = batch->driver_batch;
-   struct gl_context *ctx = &brw->ctx;
-   bool check_aperture_failed_once = false;
-
-#if GFX_VER >= 11
-   /* The PIPE_CONTROL command description says:
-    *
-    * "Whenever a Binding Table Index (BTI) used by a Render Taget Message
-    *  points to a different RENDER_SURFACE_STATE, SW must issue a Render
-    *  Target Cache Flush by enabling this bit. When render target flush
-    *  is set due to new association of BTI, PS Scoreboard Stall bit must
-    *  be set in this packet."
-   */
-   brw_emit_pipe_control_flush(brw,
-                               PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                               PIPE_CONTROL_STALL_AT_SCOREBOARD);
-#endif
-
-   /* Flush the sampler and render caches.  We definitely need to flush the
-    * sampler cache so that we get updated contents from the render cache for
-    * the glBlitFramebuffer() source.  Also, we are sometimes warned in the
-    * docs to flush the cache between reinterpretations of the same surface
-    * data with different formats, which blorp does for stencil and depth
-    * data.
-    */
-   if (params->src.enabled)
-      brw_cache_flush_for_read(brw, params->src.addr.buffer);
-   if (params->dst.enabled) {
-      brw_cache_flush_for_render(brw, params->dst.addr.buffer,
-                                 params->dst.view.format,
-                                 params->dst.aux_usage);
-   }
-   if (params->depth.enabled)
-      brw_cache_flush_for_depth(brw, params->depth.addr.buffer);
-   if (params->stencil.enabled)
-      brw_cache_flush_for_depth(brw, params->stencil.addr.buffer);
-
-   brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
-   brw_emit_l3_state(brw);
-
-retry:
-   brw_batch_require_space(brw, 1400);
-   brw_require_statebuffer_space(brw, 600);
-   brw_batch_save_state(brw);
-   check_aperture_failed_once |= brw_batch_saved_state_is_empty(brw);
-   brw->batch.no_wrap = true;
-
-#if GFX_VER == 6
-   /* Emit workaround flushes when we switch from drawing to blorping. */
-   brw_emit_post_sync_nonzero_flush(brw);
-#endif
-
-   brw->vtbl.emit_state_base_address(brw);
-
-#if GFX_VER >= 8
-   gfx7_l3_state.emit(brw);
-#endif
-
-#if GFX_VER >= 6
-   brw_emit_depth_stall_flushes(brw);
-#endif
-
-#if GFX_VER == 8
-   gfx8_write_pma_stall_bits(brw, 0);
-#endif
-
-   const unsigned scale = params->fast_clear_op ? UINT_MAX : 1;
-   if (brw->current_hash_scale != scale) {
-      brw_emit_hashing_mode(brw, params->x1 - params->x0,
-                            params->y1 - params->y0, scale);
-   }
-
-   blorp_emit(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
-      rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
-      rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
-   }
-
-   blorp_exec(batch, params);
-
-   brw->batch.no_wrap = false;
-
-   /* Check if the blorp op we just did would make our batch likely to fail to
-    * map all the BOs into the GPU at batch exec time later.  If so, flush the
-    * batch and try again with nothing else in the batch.
-    */
-   if (!brw_batch_has_aperture_space(brw, 0)) {
-      if (!check_aperture_failed_once) {
-         check_aperture_failed_once = true;
-         brw_batch_reset_to_saved(brw);
-         brw_batch_flush(brw);
-         goto retry;
-      } else {
-         int ret = brw_batch_flush(brw);
-         WARN_ONCE(ret == -ENOSPC,
-                   "i965: blorp emit exceeded available aperture space\n");
-      }
-   }
-
-   if (unlikely(brw->always_flush_batch))
-      brw_batch_flush(brw);
-
-   /* We've smashed all state compared to what the normal 3D pipeline
-    * rendering tracks for GL.
-    */
-   brw->ctx.NewDriverState |= BRW_NEW_BLORP;
-   brw->no_depth_or_stencil = !params->depth.enabled &&
-                              !params->stencil.enabled;
-   brw->ib.index_size = -1;
-   brw->urb.vsize = 0;
-   brw->urb.gs_present = false;
-   brw->urb.gsize = 0;
-   brw->urb.tess_present = false;
-   brw->urb.hsize = 0;
-   brw->urb.dsize = 0;
-
-   if (params->dst.enabled) {
-      brw_render_cache_add_bo(brw, params->dst.addr.buffer,
-                              params->dst.view.format,
-                              params->dst.aux_usage);
-   }
-   if (params->depth.enabled)
-      brw_depth_cache_add_bo(brw, params->depth.addr.buffer);
-   if (params->stencil.enabled)
-      brw_depth_cache_add_bo(brw, params->stencil.addr.buffer);
-}
diff --git a/src/mesa/drivers/dri/i965/genX_boilerplate.h b/src/mesa/drivers/dri/i965/genX_boilerplate.h
deleted file mode 100644
index 20df05d..0000000
--- a/src/mesa/drivers/dri/i965/genX_boilerplate.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright Â© 2018 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef GENX_BOILERPLATE_H
-#define GENX_BOILERPLATE_H
-
-#include <assert.h>
-
-#include "genxml/gen_macros.h"
-
-#include "brw_context.h"
-#include "brw_batch.h"
-
-UNUSED static void *
-emit_dwords(struct brw_context *brw, unsigned n)
-{
-   brw_batch_begin(brw, n);
-   uint32_t *map = brw->batch.map_next;
-   brw->batch.map_next += n;
-   brw_batch_advance(brw);
-   return map;
-}
-
-struct brw_address {
-   struct brw_bo *bo;
-   unsigned reloc_flags;
-   uint32_t offset;
-};
-
-#define __gen_address_type struct brw_address
-#define __gen_user_data struct brw_context
-
-static uint64_t
-__gen_combine_address(struct brw_context *brw, void *location,
-                      struct brw_address address, uint32_t delta)
-{
-   struct brw_batch *batch = &brw->batch;
-   uint32_t offset;
-
-   if (address.bo == NULL) {
-      return address.offset + delta;
-   } else {
-      if (GFX_VER < 6 && brw_ptr_in_state_buffer(batch, location)) {
-         offset = (char *) location - (char *) brw->batch.state.map;
-         return brw_state_reloc(batch, offset, address.bo,
-                                address.offset + delta,
-                                address.reloc_flags);
-      }
-
-      assert(!brw_ptr_in_state_buffer(batch, location));
-
-      offset = (char *) location - (char *) brw->batch.batch.map;
-      return brw_batch_reloc(batch, offset, address.bo,
-                             address.offset + delta,
-                             address.reloc_flags);
-   }
-}
-
-UNUSED static struct brw_address
-rw_bo(struct brw_bo *bo, uint32_t offset)
-{
-   return (struct brw_address) {
-            .bo = bo,
-            .offset = offset,
-            .reloc_flags = RELOC_WRITE,
-   };
-}
-
-UNUSED static struct brw_address
-ro_bo(struct brw_bo *bo, uint32_t offset)
-{
-   return (struct brw_address) {
-            .bo = bo,
-            .offset = offset,
-   };
-}
-
-UNUSED static struct brw_address
-rw_32_bo(struct brw_bo *bo, uint32_t offset)
-{
-   return (struct brw_address) {
-            .bo = bo,
-            .offset = offset,
-            .reloc_flags = RELOC_WRITE | RELOC_32BIT,
-   };
-}
-
-UNUSED static struct brw_address
-ro_32_bo(struct brw_bo *bo, uint32_t offset)
-{
-   return (struct brw_address) {
-            .bo = bo,
-            .offset = offset,
-            .reloc_flags = RELOC_32BIT,
-   };
-}
-
-UNUSED static struct brw_address
-ggtt_bo(struct brw_bo *bo, uint32_t offset)
-{
-   return (struct brw_address) {
-            .bo = bo,
-            .offset = offset,
-            .reloc_flags = RELOC_WRITE | RELOC_NEEDS_GGTT,
-   };
-}
-
-#include "genxml/genX_pack.h"
-
-#define _brw_cmd_length(cmd) cmd ## _length
-#define _brw_cmd_length_bias(cmd) cmd ## _length_bias
-#define _brw_cmd_header(cmd) cmd ## _header
-#define _brw_cmd_pack(cmd) cmd ## _pack
-
-#define brw_batch_emit(brw, cmd, name)                  \
-   for (struct cmd name = { _brw_cmd_header(cmd) },     \
-        *_dst = emit_dwords(brw, _brw_cmd_length(cmd)); \
-        __builtin_expect(_dst != NULL, 1);              \
-        _brw_cmd_pack(cmd)(brw, (void *)_dst, &name),   \
-        _dst = NULL)
-
-#define brw_batch_emitn(brw, cmd, n, ...) ({           \
-      uint32_t *_dw = emit_dwords(brw, n);             \
-      struct cmd template = {                          \
-         _brw_cmd_header(cmd),                         \
-         .DWordLength = n - _brw_cmd_length_bias(cmd), \
-         __VA_ARGS__                                   \
-      };                                               \
-      _brw_cmd_pack(cmd)(brw, _dw, &template);         \
-      _dw + 1; /* Array starts at dw[1] */             \
-   })
-
-#define brw_state_emit(brw, cmd, align, offset, name)              \
-   for (struct cmd name = {},                                      \
-        *_dst = brw_state_batch(brw, _brw_cmd_length(cmd) * 4,     \
-                                align, offset);                    \
-        __builtin_expect(_dst != NULL, 1);                         \
-        _brw_cmd_pack(cmd)(brw, (void *)_dst, &name),              \
-        _dst = NULL)
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/genX_pipe_control.c b/src/mesa/drivers/dri/i965/genX_pipe_control.c
deleted file mode 100644
index 880b7c7..0000000
--- a/src/mesa/drivers/dri/i965/genX_pipe_control.c
+++ /dev/null
@@ -1,514 +0,0 @@
-/*
- * Copyright Â© 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "genX_boilerplate.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-
-static unsigned
-flags_to_post_sync_op(uint32_t flags)
-{
-   if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
-      return WriteImmediateData;
-
-   if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
-      return WritePSDepthCount;
-
-   if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
-      return WriteTimestamp;
-
-   return 0;
-}
-
-/**
- * Do the given flags have a Post Sync or LRI Post Sync operation?
- */
-static enum pipe_control_flags
-get_post_sync_flags(enum pipe_control_flags flags)
-{
-   flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
-            PIPE_CONTROL_WRITE_DEPTH_COUNT |
-            PIPE_CONTROL_WRITE_TIMESTAMP |
-            PIPE_CONTROL_LRI_POST_SYNC_OP;
-
-   /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with
-    * "LRI Post Sync Operation".  So more than one bit set would be illegal.
-    */
-   assert(util_bitcount(flags) <= 1);
-
-   return flags;
-}
-
-#define IS_COMPUTE_PIPELINE(brw) \
-   (GFX_VER >= 7 && brw->last_pipeline == BRW_COMPUTE_PIPELINE)
-
-/* Closed interval - GFX_VER \in [x, y] */
-#define IS_GFX_VER_BETWEEN(x, y) (GFX_VER >= x && GFX_VER <= y)
-#define IS_GFX_VERx10_BETWEEN(x, y) \
-   (GFX_VERx10 >= x && GFX_VERx10 <= y)
-
-/**
- * Emit a series of PIPE_CONTROL commands, taking into account any
- * workarounds necessary to actually accomplish the caller's request.
- *
- * Unless otherwise noted, spec quotations in this function come from:
- *
- * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming
- * Restrictions for PIPE_CONTROL.
- *
- * You should not use this function directly.  Use the helpers in
- * brw_pipe_control.c instead, which may split the pipe control further.
- */
-void
-genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
-                            struct brw_bo *bo, uint32_t offset, uint64_t imm)
-{
-   UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags);
-   enum pipe_control_flags non_lri_post_sync_flags =
-      post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP;
-
-   /* Recursive PIPE_CONTROL workarounds --------------------------------
-    * (http://knowyourmeme.com/memes/xzibit-yo-dawg)
-    *
-    * We do these first because we want to look at the original operation,
-    * rather than any workarounds we set.
-    */
-   if (GFX_VER == 6 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
-      /* Hardware workaround: SNB B-Spec says:
-       *
-       *    "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
-       *     Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
-       *     required."
-       */
-      brw_emit_post_sync_nonzero_flush(brw);
-   }
-
-   if (GFX_VER == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
-      /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
-       * lists several workarounds:
-       *
-       *    "Project: SKL, KBL, BXT
-       *
-       *     If the VF Cache Invalidation Enable is set to a 1 in a
-       *     PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
-       *     sets to 0, with the VF Cache Invalidation Enable set to 0
-       *     needs to be sent prior to the PIPE_CONTROL with VF Cache
-       *     Invalidation Enable set to a 1."
-       */
-      genX(emit_raw_pipe_control)(brw, 0, NULL, 0, 0);
-   }
-
-   if (GFX_VER == 9 && IS_COMPUTE_PIPELINE(brw) && post_sync_flags) {
-      /* Project: SKL / Argument: LRI Post Sync Operation [23]
-       *
-       * "PIPECONTROL command with âCommand Streamer Stall Enableâ must be
-       *  programmed prior to programming a PIPECONTROL command with "LRI
-       *  Post Sync Operation" in GPGPU mode of operation (i.e when
-       *  PIPELINE_SELECT command is set to GPGPU mode of operation)."
-       *
-       * The same text exists a few rows below for Post Sync Op.
-       */
-      genX(emit_raw_pipe_control)(brw, PIPE_CONTROL_CS_STALL, NULL, 0, 0);
-   }
-
-   /* "Flush Types" workarounds ---------------------------------------------
-    * We do these now because they may add post-sync operations or CS stalls.
-    */
-
-   if (IS_GFX_VER_BETWEEN(8, 10) &&
-       (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
-      /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate
-       *
-       * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or
-       *  'Write PS Depth Count' or 'Write Timestamp'."
-       */
-      if (!bo) {
-         flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
-         post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
-         non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
-         bo = brw->workaround_bo;
-         offset = brw->workaround_bo_offset;
-      }
-   }
-
-   if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
-      /* Project: PRE-HSW / Argument: Depth Stall
-       *
-       * "The following bits must be clear:
-       *  - Render Target Cache Flush Enable ([12] of DW1)
-       *  - Depth Cache Flush Enable ([0] of DW1)"
-       */
-      assert(!(flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                        PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
-   }
-
-   if (GFX_VER >= 6 && (flags & PIPE_CONTROL_DEPTH_STALL)) {
-      /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable):
-       *
-       *    "This bit must be DISABLED for operations other than writing
-       *     PS_DEPTH_COUNT."
-       *
-       * This seems like nonsense.  An Ivybridge workaround requires us to
-       * emit a PIPE_CONTROL with a depth stall and write immediate post-sync
-       * operation.  Gfx8+ requires us to emit depth stalls and depth cache
-       * flushes together.  So, it's hard to imagine this means anything other
-       * than "we originally intended this to be used for PS_DEPTH_COUNT".
-       *
-       * We ignore the supposed restriction and do nothing.
-       */
-   }
-
-   if (GFX_VERx10 < 75 && (flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) {
-      /* Project: PRE-HSW / Argument: Depth Cache Flush
-       *
-       * "Depth Stall must be clear ([13] of DW1)."
-       */
-      assert(!(flags & PIPE_CONTROL_DEPTH_STALL));
-   }
-
-   if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
-      /* From the PIPE_CONTROL instruction table, bit 12 and bit 1:
-       *
-       *    "This bit must be DISABLED for End-of-pipe (Read) fences,
-       *     PS_DEPTH_COUNT or TIMESTAMP queries."
-       *
-       * TODO: Implement end-of-pipe checking.
-       */
-      assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT |
-                                  PIPE_CONTROL_WRITE_TIMESTAMP)));
-   }
-
-   if (GFX_VER < 11 && (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
-      /* From the PIPE_CONTROL instruction table, bit 1:
-       *
-       *    "This bit is ignored if Depth Stall Enable is set.
-       *     Further, the render cache is not flushed even if Write Cache
-       *     Flush Enable bit is set."
-       *
-       * We assert that the caller doesn't do this combination, to try and
-       * prevent mistakes.  It shouldn't hurt the GPU, though.
-       *
-       * We skip this check on Gfx11+ as the "Stall and Pixel Scoreboard"
-       * and "Render Target Flush" combo is explicitly required for BTI
-       * update workarounds.
-       */
-      assert(!(flags & (PIPE_CONTROL_DEPTH_STALL |
-                        PIPE_CONTROL_RENDER_TARGET_FLUSH)));
-   }
-
-   /* PIPE_CONTROL page workarounds ------------------------------------- */
-
-   if (IS_GFX_VER_BETWEEN(7, 8) &&
-       (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) {
-      /* From the PIPE_CONTROL page itself:
-       *
-       *    "IVB, HSW, BDW
-       *     Restriction: Pipe_control with CS-stall bit set must be issued
-       *     before a pipe-control command that has the State Cache
-       *     Invalidate bit set."
-       */
-      flags |= PIPE_CONTROL_CS_STALL;
-   }
-
-   if (GFX_VERx10 == 75) {
-      /* From the PIPE_CONTROL page itself:
-       *
-       *    "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation:
-       *     Prior to programming a PIPECONTROL command with any of the RO
-       *     cache invalidation bit set, program a PIPECONTROL flush command
-       *     with âCS stallâ bit and âHDC Flushâ bit set."
-       *
-       * TODO: Actually implement this.  What's an HDC Flush?
-       */
-   }
-
-   if (flags & PIPE_CONTROL_FLUSH_LLC) {
-      /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC):
-       *
-       *    "Project: ALL
-       *     SW must always program Post-Sync Operation to "Write Immediate
-       *     Data" when Flush LLC is set."
-       *
-       * For now, we just require the caller to do it.
-       */
-      assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE);
-   }
-
-   /* "Post-Sync Operation" workarounds -------------------------------- */
-
-   /* Project: All / Argument: Global Snapshot Count Reset [19]
-    *
-    * "This bit must not be exercised on any product.
-    *  Requires stall bit ([20] of DW1) set."
-    *
-    * We don't use this, so we just assert that it isn't used.  The
-    * PIPE_CONTROL instruction page indicates that they intended this
-    * as a debug feature and don't think it is useful in production,
-    * but it may actually be usable, should we ever want to.
-    */
-   assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0);
-
-   if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR |
-                PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) {
-      /* Project: All / Arguments:
-       *
-       * - Generic Media State Clear [16]
-       * - Indirect State Pointers Disable [16]
-       *
-       *    "Requires stall bit ([20] of DW1) set."
-       *
-       * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media
-       * State Clear) says:
-       *
-       *    "PIPECONTROL command with âCommand Streamer Stall Enableâ must be
-       *     programmed prior to programming a PIPECONTROL command with "Media
-       *     State Clear" set in GPGPU mode of operation"
-       *
-       * This is a subset of the earlier rule, so there's nothing to do.
-       */
-      flags |= PIPE_CONTROL_CS_STALL;
-   }
-
-   if (flags & PIPE_CONTROL_STORE_DATA_INDEX) {
-      /* Project: All / Argument: Store Data Index
-       *
-       * "Post-Sync Operation ([15:14] of DW1) must be set to something other
-       *  than '0'."
-       *
-       * For now, we just assert that the caller does this.  We might want to
-       * automatically add a write to the workaround BO...
-       */
-      assert(non_lri_post_sync_flags != 0);
-   }
-
-   if (flags & PIPE_CONTROL_SYNC_GFDT) {
-      /* Project: All / Argument: Sync GFDT
-       *
-       * "Post-Sync Operation ([15:14] of DW1) must be set to something other
-       *  than '0' or 0x2520[13] must be set."
-       *
-       * For now, we just assert that the caller does this.
-       */
-      assert(non_lri_post_sync_flags != 0);
-   }
-
-   if (IS_GFX_VERx10_BETWEEN(60, 75) &&
-       (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
-      /* Project: SNB, IVB, HSW / Argument: TLB inv
-       *
-       * "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1)
-       *  must be set to something other than '0'."
-       *
-       * For now, we just assert that the caller does this.
-       */
-      assert(non_lri_post_sync_flags != 0);
-   }
-
-   if (GFX_VER >= 7 && (flags & PIPE_CONTROL_TLB_INVALIDATE)) {
-      /* Project: IVB+ / Argument: TLB inv
-       *
-       *    "Requires stall bit ([20] of DW1) set."
-       *
-       * Also, from the PIPE_CONTROL instruction table:
-       *
-       *    "Project: SKL+
-       *     Post Sync Operation or CS stall must be set to ensure a TLB
-       *     invalidation occurs.  Otherwise no cycle will occur to the TLB
-       *     cache to invalidate."
-       *
-       * This is not a subset of the earlier rule, so there's nothing to do.
-       */
-      flags |= PIPE_CONTROL_CS_STALL;
-   }
-
-   if (GFX_VER == 9 && devinfo->gt == 4) {
-      /* TODO: The big Skylake GT4 post sync op workaround */
-   }
-
-   /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */
-
-   if (IS_COMPUTE_PIPELINE(brw)) {
-      if (GFX_VER >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) {
-         /* Project: SKL+ / Argument: Tex Invalidate
-          * "Requires stall bit ([20] of DW) set for all GPGPU Workloads."
-          */
-         flags |= PIPE_CONTROL_CS_STALL;
-      }
-
-      if (GFX_VER == 8 && (post_sync_flags ||
-                           (flags & (PIPE_CONTROL_NOTIFY_ENABLE |
-                                     PIPE_CONTROL_DEPTH_STALL |
-                                     PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                                     PIPE_CONTROL_DATA_CACHE_FLUSH)))) {
-         /* Project: BDW / Arguments:
-          *
-          * - LRI Post Sync Operation   [23]
-          * - Post Sync Op              [15:14]
-          * - Notify En                 [8]
-          * - Depth Stall               [13]
-          * - Render Target Cache Flush [12]
-          * - Depth Cache Flush         [0]
-          * - DC Flush Enable           [5]
-          *
-          *    "Requires stall bit ([20] of DW) set for all GPGPU and Media
-          *     Workloads."
-          *
-          * (The docs have separate table rows for each bit, with essentially
-          * the same workaround text.  We've combined them here.)
-          */
-         flags |= PIPE_CONTROL_CS_STALL;
-
-         /* Also, from the PIPE_CONTROL instruction table, bit 20:
-          *
-          *    "Project: BDW
-          *     This bit must be always set when PIPE_CONTROL command is
-          *     programmed by GPGPU and MEDIA workloads, except for the cases
-          *     when only Read Only Cache Invalidation bits are set (State
-          *     Cache Invalidation Enable, Instruction cache Invalidation
-          *     Enable, Texture Cache Invalidation Enable, Constant Cache
-          *     Invalidation Enable). This is to WA FFDOP CG issue, this WA
-          *     need not implemented when FF_DOP_CG is disable via "Fixed
-          *     Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register."
-          *
-          * It sounds like we could avoid CS stalls in some cases, but we
-          * don't currently bother.  This list isn't exactly the list above,
-          * either...
-          */
-      }
-   }
-
-   /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
-    *
-    * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
-    *  only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
-    *
-    * Note that the kernel does CS stalls between batches, so we only need
-    * to count them within a batch.  We currently naively count every 4, and
-    * don't skip the ones with only read-cache-invalidate bits set.  This
-    * may or may not be a problem...
-    */
-   if (GFX_VERx10 == 70) {
-      if (flags & PIPE_CONTROL_CS_STALL) {
-         /* If we're doing a CS stall, reset the counter and carry on. */
-         brw->pipe_controls_since_last_cs_stall = 0;
-      }
-
-      /* If this is the fourth pipe control without a CS stall, do one now. */
-      if (++brw->pipe_controls_since_last_cs_stall == 4) {
-         brw->pipe_controls_since_last_cs_stall = 0;
-         flags |= PIPE_CONTROL_CS_STALL;
-      }
-   }
-
-   /* "Stall" workarounds ----------------------------------------------
-    * These have to come after the earlier ones because we may have added
-    * some additional CS stalls above.
-    */
-
-   if (GFX_VER < 9 && (flags & PIPE_CONTROL_CS_STALL)) {
-      /* Project: PRE-SKL, VLV, CHV
-       *
-       * "[All Stepping][All SKUs]:
-       *
-       *  One of the following must also be set:
-       *
-       *  - Render Target Cache Flush Enable ([12] of DW1)
-       *  - Depth Cache Flush Enable ([0] of DW1)
-       *  - Stall at Pixel Scoreboard ([1] of DW1)
-       *  - Depth Stall ([13] of DW1)
-       *  - Post-Sync Operation ([13] of DW1)
-       *  - DC Flush Enable ([5] of DW1)"
-       *
-       * If we don't already have one of those bits set, we choose to add
-       * "Stall at Pixel Scoreboard".  Some of the other bits require a
-       * CS stall as a workaround (see above), which would send us into
-       * an infinite recursion of PIPE_CONTROLs.  "Stall at Pixel Scoreboard"
-       * appears to be safe, so we choose that.
-       */
-      const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                               PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                               PIPE_CONTROL_WRITE_IMMEDIATE |
-                               PIPE_CONTROL_WRITE_DEPTH_COUNT |
-                               PIPE_CONTROL_WRITE_TIMESTAMP |
-                               PIPE_CONTROL_STALL_AT_SCOREBOARD |
-                               PIPE_CONTROL_DEPTH_STALL |
-                               PIPE_CONTROL_DATA_CACHE_FLUSH;
-      if (!(flags & wa_bits))
-         flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
-   }
-
-   /* Emit --------------------------------------------------------------- */
-
-   brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) {
-   #if GFX_VER >= 9
-      pc.FlushLLC = 0;
-   #endif
-   #if GFX_VER >= 7
-      pc.LRIPostSyncOperation = NoLRIOperation;
-      pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;
-      pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;
-   #endif
-   #if GFX_VER >= 6
-      pc.StoreDataIndex = 0;
-      pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;
-      pc.GlobalSnapshotCountReset =
-         flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;
-      pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;
-      pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;
-      pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;
-      pc.RenderTargetCacheFlushEnable =
-         flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
-      pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-      pc.StateCacheInvalidationEnable =
-         flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-      pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
-      pc.ConstantCacheInvalidationEnable =
-         flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-   #else
-      pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
-   #endif
-      pc.PostSyncOperation = flags_to_post_sync_op(flags);
-      pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;
-      pc.InstructionCacheInvalidateEnable =
-         flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;
-      pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;
-   #if GFX_VERx10 >= 45
-      pc.IndirectStatePointersDisable =
-         flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;
-   #endif
-   #if GFX_VER >= 6
-      pc.TextureCacheInvalidationEnable =
-         flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-   #elif GFX_VER == 5 || GFX_VERx10 == 45
-      pc.TextureCacheFlushEnable =
-         flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-   #endif
-      pc.Address = ggtt_bo(bo, offset);
-      if (GFX_VER < 7 && bo)
-         pc.DestinationAddressType = DAT_GGTT;
-      pc.ImmediateData = imm;
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
deleted file mode 100644
index 3db621b..0000000
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ /dev/null
@@ -1,6088 +0,0 @@
-/*
- * Copyright Â© 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <assert.h>
-
-#include "main/samplerobj.h"
-
-#include "dev/intel_device_info.h"
-#include "common/intel_sample_positions.h"
-#include "genxml/gen_macros.h"
-#include "common/intel_guardband.h"
-
-#include "main/bufferobj.h"
-#include "main/context.h"
-#include "main/enums.h"
-#include "main/macros.h"
-#include "main/state.h"
-
-#include "genX_boilerplate.h"
-
-#include "brw_context.h"
-#include "brw_cs.h"
-#include "brw_draw.h"
-#include "brw_multisample_state.h"
-#include "brw_state.h"
-#include "brw_wm.h"
-#include "brw_util.h"
-
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-#include "brw_fbo.h"
-
-#include "main/enums.h"
-#include "main/fbobject.h"
-#include "main/framebuffer.h"
-#include "main/glformats.h"
-#include "main/shaderapi.h"
-#include "main/stencil.h"
-#include "main/transformfeedback.h"
-#include "main/varray.h"
-#include "main/viewport.h"
-#include "util/half_float.h"
-
-#if GFX_VER == 4
-static struct brw_address
-KSP(struct brw_context *brw, uint32_t offset)
-{
-   return ro_bo(brw->cache.bo, offset);
-}
-#else
-static uint32_t
-KSP(UNUSED struct brw_context *brw, uint32_t offset)
-{
-   return offset;
-}
-#endif
-
-#if GFX_VER >= 7
-static void
-emit_lrm(struct brw_context *brw, uint32_t reg, struct brw_address addr)
-{
-   brw_batch_emit(brw, GENX(MI_LOAD_REGISTER_MEM), lrm) {
-      lrm.RegisterAddress  = reg;
-      lrm.MemoryAddress    = addr;
-   }
-}
-#endif
-
-#if GFX_VER == 7
-static void
-emit_lri(struct brw_context *brw, uint32_t reg, uint32_t imm)
-{
-   brw_batch_emit(brw, GENX(MI_LOAD_REGISTER_IMM), lri) {
-      lri.RegisterOffset   = reg;
-      lri.DataDWord        = imm;
-   }
-}
-#endif
-
-/**
- * Define the base addresses which some state is referenced from.
- *
- * This allows us to avoid having to emit relocations for the objects,
- * and is actually required for binding table pointers on Gfx6.
- *
- * Surface state base address covers binding table pointers and surface state
- * objects, but not the surfaces that the surface state objects point to.
- */
-static void
-genX(emit_state_base_address)(struct brw_context *brw)
-{
-   if (brw->batch.state_base_address_emitted)
-      return;
-
-   /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
-    * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
-    * programmed prior to STATE_BASE_ADDRESS.
-    *
-    * However, given that the instruction SBA (general state base
-    * address) on this chipset is always set to 0 across X and GL,
-    * maybe this isn't required for us in particular.
-    */
-
-   UNUSED uint32_t mocs = brw_mocs(&brw->isl_dev, NULL);
-
-   /* Flush before updating STATE_BASE_ADDRESS */
-#if GFX_VER >= 6
-   const unsigned dc_flush =
-      GFX_VER >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
-
-   /* Emit a render target cache flush.
-    *
-    * This isn't documented anywhere in the PRM.  However, it seems to be
-    * necessary prior to changing the surface state base adress.  We've
-    * seen issues in Vulkan where we get GPU hangs when using multi-level
-    * command buffers which clear depth, reset state base address, and then
-    * go render stuff.
-    *
-    * Normally, in GL, we would trust the kernel to do sufficient stalls
-    * and flushes prior to executing our batch.  However, it doesn't seem
-    * as if the kernel's flushing is always sufficient and we don't want to
-    * rely on it.
-    *
-    * We make this an end-of-pipe sync instead of a normal flush because we
-    * do not know the current status of the GPU.  On Haswell at least,
-    * having a fast-clear operation in flight at the same time as a normal
-    * rendering operation can cause hangs.  Since the kernel's flushing is
-    * insufficient, we need to ensure that any rendering operations from
-    * other processes are definitely complete before we try to do our own
-    * rendering.  It's a bit of a big hammer but it appears to work.
-    */
-   brw_emit_end_of_pipe_sync(brw,
-                             PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                             PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                             dc_flush);
-#endif
-
-   brw_batch_emit(brw, GENX(STATE_BASE_ADDRESS), sba) {
-      /* Set base addresses */
-      sba.GeneralStateBaseAddressModifyEnable = true;
-
-#if GFX_VER >= 6
-      sba.DynamicStateBaseAddressModifyEnable = true;
-      sba.DynamicStateBaseAddress = ro_bo(brw->batch.state.bo, 0);
-#endif
-
-      sba.SurfaceStateBaseAddressModifyEnable = true;
-      sba.SurfaceStateBaseAddress = ro_bo(brw->batch.state.bo, 0);
-
-      sba.IndirectObjectBaseAddressModifyEnable = true;
-
-#if GFX_VER >= 5
-      sba.InstructionBaseAddressModifyEnable = true;
-      sba.InstructionBaseAddress = ro_bo(brw->cache.bo, 0);
-#endif
-
-      /* Set buffer sizes on Gfx8+ or upper bounds on Gfx4-7 */
-#if GFX_VER >= 8
-      sba.GeneralStateBufferSize   = 0xfffff;
-      sba.IndirectObjectBufferSize = 0xfffff;
-      sba.InstructionBufferSize    = 0xfffff;
-      sba.DynamicStateBufferSize   = MAX_STATE_SIZE;
-
-      sba.GeneralStateBufferSizeModifyEnable    = true;
-      sba.DynamicStateBufferSizeModifyEnable    = true;
-      sba.IndirectObjectBufferSizeModifyEnable  = true;
-      sba.InstructionBuffersizeModifyEnable     = true;
-#else
-      sba.GeneralStateAccessUpperBoundModifyEnable = true;
-      sba.IndirectObjectAccessUpperBoundModifyEnable = true;
-
-#if GFX_VER >= 5
-      sba.InstructionAccessUpperBoundModifyEnable = true;
-#endif
-
-#if GFX_VER >= 6
-      /* Dynamic state upper bound.  Although the documentation says that
-       * programming it to zero will cause it to be ignored, that is a lie.
-       * If this isn't programmed to a real bound, the sampler border color
-       * pointer is rejected, causing border color to mysteriously fail.
-       */
-      sba.DynamicStateAccessUpperBound = ro_bo(NULL, 0xfffff000);
-      sba.DynamicStateAccessUpperBoundModifyEnable = true;
-#else
-      /* Same idea but using General State Base Address on Gfx4-5 */
-      sba.GeneralStateAccessUpperBound = ro_bo(NULL, 0xfffff000);
-#endif
-#endif
-
-#if GFX_VER >= 6
-      /* The hardware appears to pay attention to the MOCS fields even
-       * if you don't set the "Address Modify Enable" bit for the base.
-       */
-      sba.GeneralStateMOCS            = mocs;
-      sba.StatelessDataPortAccessMOCS = mocs;
-      sba.DynamicStateMOCS            = mocs;
-      sba.IndirectObjectMOCS          = mocs;
-      sba.InstructionMOCS             = mocs;
-      sba.SurfaceStateMOCS            = mocs;
-#endif
-#if GFX_VER >= 9
-      sba.BindlessSurfaceStateMOCS    = mocs;
-#endif
-#if GFX_VER >= 11
-      sba.BindlessSamplerStateMOCS    = mocs;
-#endif
-   }
-
-   /* Flush after updating STATE_BASE_ADDRESS */
-#if GFX_VER >= 6
-   brw_emit_pipe_control_flush(brw,
-                               PIPE_CONTROL_INSTRUCTION_INVALIDATE |
-                               PIPE_CONTROL_STATE_CACHE_INVALIDATE |
-                               PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
-#endif
-
-   /* According to section 3.6.1 of VOL1 of the 965 PRM,
-    * STATE_BASE_ADDRESS updates require a reissue of:
-    *
-    * 3DSTATE_PIPELINE_POINTERS
-    * 3DSTATE_BINDING_TABLE_POINTERS
-    * MEDIA_STATE_POINTERS
-    *
-    * and this continues through Ironlake.  The Sandy Bridge PRM, vol
-    * 1 part 1 says that the folowing packets must be reissued:
-    *
-    * 3DSTATE_CC_POINTERS
-    * 3DSTATE_BINDING_TABLE_POINTERS
-    * 3DSTATE_SAMPLER_STATE_POINTERS
-    * 3DSTATE_VIEWPORT_STATE_POINTERS
-    * MEDIA_STATE_POINTERS
-    *
-    * Those are always reissued following SBA updates anyway (new
-    * batch time), except in the case of the program cache BO
-    * changing.  Having a separate state flag makes the sequence more
-    * obvious.
-    */
-   brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS;
-   brw->batch.state_base_address_emitted = true;
-}
-
-/**
- * Polygon stipple packet
- */
-static void
-genX(upload_polygon_stipple)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   /* _NEW_POLYGON */
-   if (!ctx->Polygon.StippleFlag)
-      return;
-
-   brw_batch_emit(brw, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) {
-      /* Polygon stipple is provided in OpenGL order, i.e. bottom
-       * row first.  If we're rendering to a window (i.e. the
-       * default frame buffer object, 0), then we need to invert
-       * it to match our pixel layout.  But if we're rendering
-       * to a FBO (i.e. any named frame buffer object), we *don't*
-       * need to invert - we already match the layout.
-       */
-      if (ctx->DrawBuffer->FlipY) {
-         for (unsigned i = 0; i < 32; i++)
-            poly.PatternRow[i] = ctx->PolygonStipple[31 - i]; /* invert */
-      } else {
-         for (unsigned i = 0; i < 32; i++)
-            poly.PatternRow[i] = ctx->PolygonStipple[i];
-      }
-   }
-}
-
-static const struct brw_tracked_state genX(polygon_stipple) = {
-   .dirty = {
-      .mesa = _NEW_POLYGON |
-              _NEW_POLYGONSTIPPLE,
-      .brw = BRW_NEW_CONTEXT,
-   },
-   .emit = genX(upload_polygon_stipple),
-};
-
-/**
- * Polygon stipple offset packet
- */
-static void
-genX(upload_polygon_stipple_offset)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   /* _NEW_POLYGON */
-   if (!ctx->Polygon.StippleFlag)
-      return;
-
-   brw_batch_emit(brw, GENX(3DSTATE_POLY_STIPPLE_OFFSET), poly) {
-      /* _NEW_BUFFERS
-       *
-       * If we're drawing to a system window we have to invert the Y axis
-       * in order to match the OpenGL pixel coordinate system, and our
-       * offset must be matched to the window position.  If we're drawing
-       * to a user-created FBO then our native pixel coordinate system
-       * works just fine, and there's no window system to worry about.
-       */
-      if (ctx->DrawBuffer->FlipY) {
-         poly.PolygonStippleYOffset =
-            (32 - (_mesa_geometric_height(ctx->DrawBuffer) & 31)) & 31;
-      }
-   }
-}
-
-static const struct brw_tracked_state genX(polygon_stipple_offset) = {
-   .dirty = {
-      .mesa = _NEW_BUFFERS |
-              _NEW_POLYGON,
-      .brw = BRW_NEW_CONTEXT,
-   },
-   .emit = genX(upload_polygon_stipple_offset),
-};
-
-/**
- * Line stipple packet
- */
-static void
-genX(upload_line_stipple)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   if (!ctx->Line.StippleFlag)
-      return;
-
-   brw_batch_emit(brw, GENX(3DSTATE_LINE_STIPPLE), line) {
-      line.LineStipplePattern = ctx->Line.StipplePattern;
-
-      line.LineStippleInverseRepeatCount = 1.0f / ctx->Line.StippleFactor;
-      line.LineStippleRepeatCount = ctx->Line.StippleFactor;
-   }
-}
-
-static const struct brw_tracked_state genX(line_stipple) = {
-   .dirty = {
-      .mesa = _NEW_LINE,
-      .brw = BRW_NEW_CONTEXT,
-   },
-   .emit = genX(upload_line_stipple),
-};
-
-/* Constant single cliprect for framebuffer object or DRI2 drawing */
-static void
-genX(upload_drawing_rect)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   const struct gl_framebuffer *fb = ctx->DrawBuffer;
-   const unsigned int fb_width = _mesa_geometric_width(fb);
-   const unsigned int fb_height = _mesa_geometric_height(fb);
-
-   brw_batch_emit(brw, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
-      rect.ClippedDrawingRectangleXMax = fb_width - 1;
-      rect.ClippedDrawingRectangleYMax = fb_height - 1;
-   }
-}
-
-static const struct brw_tracked_state genX(drawing_rect) = {
-   .dirty = {
-      .mesa = _NEW_BUFFERS,
-      .brw = BRW_NEW_BLORP |
-             BRW_NEW_CONTEXT,
-   },
-   .emit = genX(upload_drawing_rect),
-};
-
-static uint32_t *
-genX(emit_vertex_buffer_state)(struct brw_context *brw,
-                               uint32_t *dw,
-                               unsigned buffer_nr,
-                               struct brw_bo *bo,
-                               unsigned start_offset,
-                               UNUSED unsigned end_offset,
-                               unsigned stride,
-                               UNUSED unsigned step_rate)
-{
-   struct GENX(VERTEX_BUFFER_STATE) buf_state = {
-      .VertexBufferIndex = buffer_nr,
-      .BufferPitch = stride,
-
-      /* The VF cache designers apparently cut corners, and made the cache
-       * only consider the bottom 32 bits of memory addresses.  If you happen
-       * to have two vertex buffers which get placed exactly 4 GiB apart and
-       * use them in back-to-back draw calls, you can get collisions.  To work
-       * around this problem, we restrict vertex buffers to the low 32 bits of
-       * the address space.
-       */
-      .BufferStartingAddress = ro_32_bo(bo, start_offset),
-#if GFX_VER >= 8
-      .BufferSize = end_offset - start_offset,
-#endif
-
-#if GFX_VER >= 7
-      .AddressModifyEnable = true,
-#endif
-
-#if GFX_VER >= 6
-      .MOCS = brw_mocs(&brw->isl_dev, bo),
-#endif
-
-#if GFX_VER < 8
-      .BufferAccessType = step_rate ? INSTANCEDATA : VERTEXDATA,
-      .InstanceDataStepRate = step_rate,
-#if GFX_VER >= 5
-      .EndAddress = ro_bo(bo, end_offset - 1),
-#endif
-#endif
-   };
-
-   GENX(VERTEX_BUFFER_STATE_pack)(brw, dw, &buf_state);
-   return dw + GENX(VERTEX_BUFFER_STATE_length);
-}
-
-UNUSED static bool
-is_passthru_format(uint32_t format)
-{
-   switch (format) {
-   case ISL_FORMAT_R64_PASSTHRU:
-   case ISL_FORMAT_R64G64_PASSTHRU:
-   case ISL_FORMAT_R64G64B64_PASSTHRU:
-   case ISL_FORMAT_R64G64B64A64_PASSTHRU:
-      return true;
-   default:
-      return false;
-   }
-}
-
-UNUSED static int
-uploads_needed(uint32_t format,
-               bool is_dual_slot)
-{
-   if (!is_passthru_format(format))
-      return 1;
-
-   if (is_dual_slot)
-      return 2;
-
-   switch (format) {
-   case ISL_FORMAT_R64_PASSTHRU:
-   case ISL_FORMAT_R64G64_PASSTHRU:
-      return 1;
-   case ISL_FORMAT_R64G64B64_PASSTHRU:
-   case ISL_FORMAT_R64G64B64A64_PASSTHRU:
-      return 2;
-   default:
-      unreachable("not reached");
-   }
-}
-
-/*
- * Returns the format that we are finally going to use when upload a vertex
- * element. It will only change if we are using *64*PASSTHRU formats, as for
- * gen < 8 they need to be splitted on two *32*FLOAT formats.
- *
- * @upload points in which upload we are. Valid values are [0,1]
- */
-static uint32_t
-downsize_format_if_needed(uint32_t format,
-                          int upload)
-{
-   assert(upload == 0 || upload == 1);
-
-   if (!is_passthru_format(format))
-      return format;
-
-   /* ISL_FORMAT_R64_PASSTHRU and ISL_FORMAT_R64G64_PASSTHRU with an upload ==
-    * 1 means that we have been forced to do 2 uploads for a size <= 2. This
-    * happens with gen < 8 and dvec3 or dvec4 vertex shader input
-    * variables. In those cases, we return ISL_FORMAT_R32_FLOAT as a way of
-    * flagging that we want to fill with zeroes this second forced upload.
-    */
-   switch (format) {
-   case ISL_FORMAT_R64_PASSTHRU:
-      return upload == 0 ? ISL_FORMAT_R32G32_FLOAT
-                         : ISL_FORMAT_R32_FLOAT;
-   case ISL_FORMAT_R64G64_PASSTHRU:
-      return upload == 0 ? ISL_FORMAT_R32G32B32A32_FLOAT
-                         : ISL_FORMAT_R32_FLOAT;
-   case ISL_FORMAT_R64G64B64_PASSTHRU:
-      return upload == 0 ? ISL_FORMAT_R32G32B32A32_FLOAT
-                         : ISL_FORMAT_R32G32_FLOAT;
-   case ISL_FORMAT_R64G64B64A64_PASSTHRU:
-      return ISL_FORMAT_R32G32B32A32_FLOAT;
-   default:
-      unreachable("not reached");
-   }
-}
-
-/*
- * Returns the number of componentes associated with a format that is used on
- * a 64 to 32 format split. See downsize_format()
- */
-static int
-upload_format_size(uint32_t upload_format)
-{
-   switch (upload_format) {
-   case ISL_FORMAT_R32_FLOAT:
-
-      /* downsized_format has returned this one in order to flag that we are
-       * performing a second upload which we want to have filled with
-       * zeroes. This happens with gen < 8, a size <= 2, and dvec3 or dvec4
-       * vertex shader input variables.
-       */
-
-      return 0;
-   case ISL_FORMAT_R32G32_FLOAT:
-      return 2;
-   case ISL_FORMAT_R32G32B32A32_FLOAT:
-      return 4;
-   default:
-      unreachable("not reached");
-   }
-}
-
-static UNUSED uint16_t
-pinned_bo_high_bits(struct brw_bo *bo)
-{
-   return (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32ull : 0;
-}
-
-/* The VF cache designers apparently cut corners, and made the cache key's
- * <VertexBufferIndex, Memory Address> tuple only consider the bottom 32 bits
- * of the address.  If you happen to have two vertex buffers which get placed
- * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
- * collisions.  (These collisions can happen within a single batch.)
- *
- * In the soft-pin world, we'd like to assign addresses up front, and never
- * move buffers.  So, we need to do a VF cache invalidate if the buffer for
- * a particular VB slot has different [48:32] address bits than the last one.
- *
- * In the relocation world, we have no idea what the addresses will be, so
- * we can't apply this workaround.  Instead, we tell the kernel to move it
- * to the low 4GB regardless.
- *
- * This HW issue is gone on Gfx11+.
- */
-static void
-vf_invalidate_for_vb_48bit_transitions(UNUSED struct brw_context *brw)
-{
-#if GFX_VER >= 8 && GFX_VER < 11
-   bool need_invalidate = false;
-
-   for (unsigned i = 0; i < brw->vb.nr_buffers; i++) {
-      uint16_t high_bits = pinned_bo_high_bits(brw->vb.buffers[i].bo);
-
-      if (high_bits != brw->vb.last_bo_high_bits[i]) {
-         need_invalidate = true;
-         brw->vb.last_bo_high_bits[i] = high_bits;
-      }
-   }
-
-   if (brw->draw.draw_params_bo) {
-      uint16_t high_bits = pinned_bo_high_bits(brw->draw.draw_params_bo);
-
-      if (brw->vb.last_bo_high_bits[brw->vb.nr_buffers] != high_bits) {
-         need_invalidate = true;
-         brw->vb.last_bo_high_bits[brw->vb.nr_buffers] = high_bits;
-      }
-   }
-
-   if (brw->draw.derived_draw_params_bo) {
-      uint16_t high_bits = pinned_bo_high_bits(brw->draw.derived_draw_params_bo);
-
-      if (brw->vb.last_bo_high_bits[brw->vb.nr_buffers + 1] != high_bits) {
-         need_invalidate = true;
-         brw->vb.last_bo_high_bits[brw->vb.nr_buffers + 1] = high_bits;
-      }
-   }
-
-   if (need_invalidate) {
-      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_CS_STALL);
-   }
-#endif
-}
-
-static void
-vf_invalidate_for_ib_48bit_transition(UNUSED struct brw_context *brw)
-{
-#if GFX_VER >= 8
-   uint16_t high_bits = pinned_bo_high_bits(brw->ib.bo);
-
-   if (high_bits != brw->ib.last_bo_high_bits) {
-      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE);
-      brw->ib.last_bo_high_bits = high_bits;
-   }
-#endif
-}
-
-static void
-genX(emit_vertices)(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   uint32_t *dw;
-
-   brw_prepare_vertices(brw);
-   brw_prepare_shader_draw_parameters(brw);
-
-#if GFX_VER < 6
-   brw_emit_query_begin(brw);
-#endif
-
-   const struct brw_vs_prog_data *vs_prog_data =
-      brw_vs_prog_data(brw->vs.base.prog_data);
-
-#if GFX_VER >= 8
-   struct gl_context *ctx = &brw->ctx;
-   const bool uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL ||
-                                ctx->Polygon.BackMode != GL_FILL);
-
-   if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) {
-      unsigned vue = brw->vb.nr_enabled;
-
-      /* The element for the edge flags must always be last, so we have to
-       * insert the SGVS before it in that case.
-       */
-      if (uses_edge_flag) {
-         assert(vue > 0);
-         vue--;
-      }
-
-      WARN_ONCE(vue >= 33,
-                "Trying to insert VID/IID past 33rd vertex element, "
-                "need to reorder the vertex attrbutes.");
-
-      brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs) {
-         if (vs_prog_data->uses_vertexid) {
-            vfs.VertexIDEnable = true;
-            vfs.VertexIDComponentNumber = 2;
-            vfs.VertexIDElementOffset = vue;
-         }
-
-         if (vs_prog_data->uses_instanceid) {
-            vfs.InstanceIDEnable = true;
-            vfs.InstanceIDComponentNumber = 3;
-            vfs.InstanceIDElementOffset = vue;
-         }
-      }
-
-      brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) {
-         vfi.InstancingEnable = true;
-         vfi.VertexElementIndex = vue;
-      }
-   } else {
-      brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs);
-   }
-#endif
-
-   const bool uses_draw_params =
-      vs_prog_data->uses_firstvertex ||
-      vs_prog_data->uses_baseinstance;
-
-   const bool uses_derived_draw_params =
-      vs_prog_data->uses_drawid ||
-      vs_prog_data->uses_is_indexed_draw;
-
-   const bool needs_sgvs_element = (uses_draw_params ||
-                                    vs_prog_data->uses_instanceid ||
-                                    vs_prog_data->uses_vertexid);
-
-   unsigned nr_elements =
-      brw->vb.nr_enabled + needs_sgvs_element + uses_derived_draw_params;
-
-#if GFX_VER < 8
-   /* If any of the formats of vb.enabled needs more that one upload, we need
-    * to add it to nr_elements
-    */
-   for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
-      struct brw_vertex_element *input = brw->vb.enabled[i];
-      uint32_t format = brw_get_vertex_surface_type(brw, input->glformat);
-
-      if (uploads_needed(format, input->is_dual_slot) > 1)
-         nr_elements++;
-   }
-#endif
-
-   /* If the VS doesn't read any inputs (calculating vertex position from
-    * a state variable for some reason, for example), emit a single pad
-    * VERTEX_ELEMENT struct and bail.
-    *
-    * The stale VB state stays in place, but they don't do anything unless
-    * a VE loads from them.
-    */
-   if (nr_elements == 0) {
-      dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS),
-                           1 + GENX(VERTEX_ELEMENT_STATE_length));
-      struct GENX(VERTEX_ELEMENT_STATE) elem = {
-         .Valid = true,
-         .SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,
-         .Component0Control = VFCOMP_STORE_0,
-         .Component1Control = VFCOMP_STORE_0,
-         .Component2Control = VFCOMP_STORE_0,
-         .Component3Control = VFCOMP_STORE_1_FP,
-      };
-      GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem);
-      return;
-   }
-
-   /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
-   const unsigned nr_buffers = brw->vb.nr_buffers +
-      uses_draw_params + uses_derived_draw_params;
-
-   vf_invalidate_for_vb_48bit_transitions(brw);
-
-   if (nr_buffers) {
-      assert(nr_buffers <= (GFX_VER >= 6 ? 33 : 17));
-
-      dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_BUFFERS),
-                           1 + GENX(VERTEX_BUFFER_STATE_length) * nr_buffers);
-
-      for (unsigned i = 0; i < brw->vb.nr_buffers; i++) {
-         const struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
-         /* Prior to Haswell and Bay Trail we have to use 4-component formats
-          * to fake 3-component ones.  In particular, we do this for
-          * half-float and 8 and 16-bit integer formats.  This means that the
-          * vertex element may poke over the end of the buffer by 2 bytes.
-          */
-         const unsigned padding =
-            (GFX_VERx10 < 75 && devinfo->platform != INTEL_PLATFORM_BYT) * 2;
-         const unsigned end = buffer->offset + buffer->size + padding;
-         dw = genX(emit_vertex_buffer_state)(brw, dw, i, buffer->bo,
-                                             buffer->offset,
-                                             end,
-                                             buffer->stride,
-                                             buffer->step_rate);
-      }
-
-      if (uses_draw_params) {
-         dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers,
-                                             brw->draw.draw_params_bo,
-                                             brw->draw.draw_params_offset,
-                                             brw->draw.draw_params_bo->size,
-                                             0 /* stride */,
-                                             0 /* step rate */);
-      }
-
-      if (uses_derived_draw_params) {
-         dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers + 1,
-                                             brw->draw.derived_draw_params_bo,
-                                             brw->draw.derived_draw_params_offset,
-                                             brw->draw.derived_draw_params_bo->size,
-                                             0 /* stride */,
-                                             0 /* step rate */);
-      }
-   }
-
-   /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
-    * presumably for VertexID/InstanceID.
-    */
-#if GFX_VER >= 6
-   assert(nr_elements <= 34);
-   const struct brw_vertex_element *gfx6_edgeflag_input = NULL;
-#else
-   assert(nr_elements <= 18);
-#endif
-
-   dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS),
-                        1 + GENX(VERTEX_ELEMENT_STATE_length) * nr_elements);
-   unsigned i;
-   for (i = 0; i < brw->vb.nr_enabled; i++) {
-      const struct brw_vertex_element *input = brw->vb.enabled[i];
-      const struct gl_vertex_format *glformat = input->glformat;
-      uint32_t format = brw_get_vertex_surface_type(brw, glformat);
-      uint32_t comp0 = VFCOMP_STORE_SRC;
-      uint32_t comp1 = VFCOMP_STORE_SRC;
-      uint32_t comp2 = VFCOMP_STORE_SRC;
-      uint32_t comp3 = VFCOMP_STORE_SRC;
-      const unsigned num_uploads = GFX_VER < 8 ?
-         uploads_needed(format, input->is_dual_slot) : 1;
-
-#if GFX_VER >= 8
-      /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
-       * "Any SourceElementFormat of *64*_PASSTHRU cannot be used with an
-       * element which has edge flag enabled."
-       */
-      assert(!(is_passthru_format(format) && uses_edge_flag));
-#endif
-
-      /* The gfx4 driver expects edgeflag to come in as a float, and passes
-       * that float on to the tests in the clipper.  Mesa's current vertex
-       * attribute value for EdgeFlag is stored as a float, which works out.
-       * glEdgeFlagPointer, on the other hand, gives us an unnormalized
-       * integer ubyte.  Just rewrite that to convert to a float.
-       *
-       * Gfx6+ passes edgeflag as sideband along with the vertex, instead
-       * of in the VUE.  We have to upload it sideband as the last vertex
-       * element according to the B-Spec.
-       */
-#if GFX_VER >= 6
-      if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) {
-         gfx6_edgeflag_input = input;
-         continue;
-      }
-#endif
-
-      for (unsigned c = 0; c < num_uploads; c++) {
-         const uint32_t upload_format = GFX_VER >= 8 ? format :
-            downsize_format_if_needed(format, c);
-         /* If we need more that one upload, the offset stride would be 128
-          * bits (16 bytes), as for previous uploads we are using the full
-          * entry. */
-         const unsigned offset = input->offset + c * 16;
-
-         const int size = (GFX_VER < 8 && is_passthru_format(format)) ?
-            upload_format_size(upload_format) : glformat->Size;
-
-         switch (size) {
-            case 0: comp0 = VFCOMP_STORE_0; FALLTHROUGH;
-            case 1: comp1 = VFCOMP_STORE_0; FALLTHROUGH;
-            case 2: comp2 = VFCOMP_STORE_0; FALLTHROUGH;
-            case 3:
-               if (GFX_VER >= 8 && glformat->Doubles) {
-                  comp3 = VFCOMP_STORE_0;
-               } else if (glformat->Integer) {
-                  comp3 = VFCOMP_STORE_1_INT;
-               } else {
-                  comp3 = VFCOMP_STORE_1_FP;
-               }
-
-               break;
-         }
-
-#if GFX_VER >= 8
-         /* From the BDW PRM, Volume 2d, page 586 (VERTEX_ELEMENT_STATE):
-          *
-          *     "When SourceElementFormat is set to one of the *64*_PASSTHRU
-          *     formats, 64-bit components are stored in the URB without any
-          *     conversion. In this case, vertex elements must be written as 128
-          *     or 256 bits, with VFCOMP_STORE_0 being used to pad the output as
-          *     required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red
-          *     component into the URB, Component 1 must be specified as
-          *     VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE) in
-          *     order to output a 128-bit vertex element, or Components 1-3 must
-          *     be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex
-          *     element. Likewise, use of R64G64B64_PASSTHRU requires Component 3
-          *     to be specified as VFCOMP_STORE_0 in order to output a 256-bit
-          *     vertex element."
-          */
-         if (glformat->Doubles && !input->is_dual_slot) {
-            /* Store vertex elements which correspond to double and dvec2 vertex
-             * shader inputs as 128-bit vertex elements, instead of 256-bits.
-             */
-            comp2 = VFCOMP_NOSTORE;
-            comp3 = VFCOMP_NOSTORE;
-         }
-#endif
-
-         struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
-            .VertexBufferIndex = input->buffer,
-            .Valid = true,
-            .SourceElementFormat = upload_format,
-            .SourceElementOffset = offset,
-            .Component0Control = comp0,
-            .Component1Control = comp1,
-            .Component2Control = comp2,
-            .Component3Control = comp3,
-#if GFX_VER < 5
-            .DestinationElementOffset = i * 4,
-#endif
-         };
-
-         GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state);
-         dw += GENX(VERTEX_ELEMENT_STATE_length);
-      }
-   }
-
-   if (needs_sgvs_element) {
-      struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
-         .Valid = true,
-         .Component0Control = VFCOMP_STORE_0,
-         .Component1Control = VFCOMP_STORE_0,
-         .Component2Control = VFCOMP_STORE_0,
-         .Component3Control = VFCOMP_STORE_0,
-#if GFX_VER < 5
-         .DestinationElementOffset = i * 4,
-#endif
-      };
-
-#if GFX_VER >= 8
-      if (uses_draw_params) {
-         elem_state.VertexBufferIndex = brw->vb.nr_buffers;
-         elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
-         elem_state.Component0Control = VFCOMP_STORE_SRC;
-         elem_state.Component1Control = VFCOMP_STORE_SRC;
-      }
-#else
-      elem_state.VertexBufferIndex = brw->vb.nr_buffers;
-      elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
-      if (uses_draw_params) {
-         elem_state.Component0Control = VFCOMP_STORE_SRC;
-         elem_state.Component1Control = VFCOMP_STORE_SRC;
-      }
-
-      if (vs_prog_data->uses_vertexid)
-         elem_state.Component2Control = VFCOMP_STORE_VID;
-
-      if (vs_prog_data->uses_instanceid)
-         elem_state.Component3Control = VFCOMP_STORE_IID;
-#endif
-
-      GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state);
-      dw += GENX(VERTEX_ELEMENT_STATE_length);
-   }
-
-   if (uses_derived_draw_params) {
-      struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
-         .Valid = true,
-         .VertexBufferIndex = brw->vb.nr_buffers + 1,
-         .SourceElementFormat = ISL_FORMAT_R32G32_UINT,
-         .Component0Control = VFCOMP_STORE_SRC,
-         .Component1Control = VFCOMP_STORE_SRC,
-         .Component2Control = VFCOMP_STORE_0,
-         .Component3Control = VFCOMP_STORE_0,
-#if GFX_VER < 5
-         .DestinationElementOffset = i * 4,
-#endif
-      };
-
-      GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state);
-      dw += GENX(VERTEX_ELEMENT_STATE_length);
-   }
-
-#if GFX_VER >= 6
-   if (gfx6_edgeflag_input) {
-      const struct gl_vertex_format *glformat = gfx6_edgeflag_input->glformat;
-      const uint32_t format = brw_get_vertex_surface_type(brw, glformat);
-
-      struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
-         .Valid = true,
-         .VertexBufferIndex = gfx6_edgeflag_input->buffer,
-         .EdgeFlagEnable = true,
-         .SourceElementFormat = format,
-         .SourceElementOffset = gfx6_edgeflag_input->offset,
-         .Component0Control = VFCOMP_STORE_SRC,
-         .Component1Control = VFCOMP_STORE_0,
-         .Component2Control = VFCOMP_STORE_0,
-         .Component3Control = VFCOMP_STORE_0,
-      };
-
-      GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state);
-      dw += GENX(VERTEX_ELEMENT_STATE_length);
-   }
-#endif
-
-#if GFX_VER >= 8
-   for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) {
-      const struct brw_vertex_element *input = brw->vb.enabled[i];
-      const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer];
-      unsigned element_index;
-
-      /* The edge flag element is reordered to be the last one in the code
-       * above so we need to compensate for that in the element indices used
-       * below.
-       */
-      if (input == gfx6_edgeflag_input)
-         element_index = nr_elements - 1;
-      else
-         element_index = j++;
-
-      brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) {
-         vfi.VertexElementIndex = element_index;
-         vfi.InstancingEnable = buffer->step_rate != 0;
-         vfi.InstanceDataStepRate = buffer->step_rate;
-      }
-   }
-
-   if (vs_prog_data->uses_drawid) {
-      const unsigned element = brw->vb.nr_enabled + needs_sgvs_element;
-
-      brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) {
-         vfi.VertexElementIndex = element;
-      }
-   }
-#endif
-}
-
-static const struct brw_tracked_state genX(vertices) = {
-   .dirty = {
-      .mesa = _NEW_POLYGON,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_VERTEX_PROGRAM |
-             BRW_NEW_VERTICES |
-             BRW_NEW_VS_PROG_DATA,
-   },
-   .emit = genX(emit_vertices),
-};
-
-static void
-genX(emit_index_buffer)(struct brw_context *brw)
-{
-   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
-
-   if (index_buffer == NULL)
-      return;
-
-   vf_invalidate_for_ib_48bit_transition(brw);
-
-   brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) {
-#if GFX_VERx10 < 75
-      assert(brw->ib.enable_cut_index == brw->prim_restart.enable_cut_index);
-      ib.CutIndexEnable = brw->ib.enable_cut_index;
-#endif
-      ib.IndexFormat = brw_get_index_type(1 << index_buffer->index_size_shift);
-
-#if GFX_VER >= 6
-      ib.MOCS = brw_mocs(&brw->isl_dev, brw->ib.bo);
-#endif
-
-      /* The VF cache designers apparently cut corners, and made the cache
-       * only consider the bottom 32 bits of memory addresses.  If you happen
-       * to have two index buffers which get placed exactly 4 GiB apart and
-       * use them in back-to-back draw calls, you can get collisions.  To work
-       * around this problem, we restrict index buffers to the low 32 bits of
-       * the address space.
-       */
-      ib.BufferStartingAddress = ro_32_bo(brw->ib.bo, 0);
-#if GFX_VER >= 8
-      ib.BufferSize = brw->ib.size;
-#else
-      ib.BufferEndingAddress = ro_bo(brw->ib.bo, brw->ib.size - 1);
-#endif
-   }
-}
-
-static const struct brw_tracked_state genX(index_buffer) = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_INDEX_BUFFER,
-   },
-   .emit = genX(emit_index_buffer),
-};
-
-#if GFX_VERx10 >= 75
-static void
-genX(upload_cut_index)(struct brw_context *brw)
-{
-   brw_batch_emit(brw, GENX(3DSTATE_VF), vf) {
-      if (brw->prim_restart.enable_cut_index && brw->ib.ib) {
-         vf.IndexedDrawCutIndexEnable = true;
-         vf.CutIndex = brw->prim_restart.restart_index;
-      }
-   }
-}
-
-const struct brw_tracked_state genX(cut_index) = {
-   .dirty = {
-      .mesa  = _NEW_TRANSFORM,
-      .brw   = BRW_NEW_INDEX_BUFFER,
-   },
-   .emit = genX(upload_cut_index),
-};
-#endif
-
-static void
-genX(upload_vf_statistics)(struct brw_context *brw)
-{
-   brw_batch_emit(brw, GENX(3DSTATE_VF_STATISTICS), vf) {
-      vf.StatisticsEnable = true;
-   }
-}
-
-const struct brw_tracked_state genX(vf_statistics) = {
-   .dirty = {
-      .mesa  = 0,
-      .brw   = BRW_NEW_BLORP | BRW_NEW_CONTEXT,
-   },
-   .emit = genX(upload_vf_statistics),
-};
-
-#if GFX_VER >= 6
-/**
- * Determine the appropriate attribute override value to store into the
- * 3DSTATE_SF structure for a given fragment shader attribute.  The attribute
- * override value contains two pieces of information: the location of the
- * attribute in the VUE (relative to urb_entry_read_offset, see below), and a
- * flag indicating whether to "swizzle" the attribute based on the direction
- * the triangle is facing.
- *
- * If an attribute is "swizzled", then the given VUE location is used for
- * front-facing triangles, and the VUE location that immediately follows is
- * used for back-facing triangles.  We use this to implement the mapping from
- * gl_FrontColor/gl_BackColor to gl_Color.
- *
- * urb_entry_read_offset is the offset into the VUE at which the SF unit is
- * being instructed to begin reading attribute data.  It can be set to a
- * nonzero value to prevent the SF unit from wasting time reading elements of
- * the VUE that are not needed by the fragment shader.  It is measured in
- * 256-bit increments.
- */
-static void
-genX(get_attr_override)(struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr,
-                        const struct brw_vue_map *vue_map,
-                        int urb_entry_read_offset, int fs_attr,
-                        bool two_side_color, uint32_t *max_source_attr)
-{
-   /* Find the VUE slot for this attribute. */
-   int slot = vue_map->varying_to_slot[fs_attr];
-
-   /* Viewport and Layer are stored in the VUE header.  We need to override
-    * them to zero if earlier stages didn't write them, as GL requires that
-    * they read back as zero when not explicitly set.
-    */
-   if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) {
-      attr->ComponentOverrideX = true;
-      attr->ComponentOverrideW = true;
-      attr->ConstantSource = CONST_0000;
-
-      if (!(vue_map->slots_valid & VARYING_BIT_LAYER))
-         attr->ComponentOverrideY = true;
-      if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))
-         attr->ComponentOverrideZ = true;
-
-      return;
-   }
-
-   /* If there was only a back color written but not front, use back
-    * as the color instead of undefined
-    */
-   if (slot == -1 && fs_attr == VARYING_SLOT_COL0)
-      slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];
-   if (slot == -1 && fs_attr == VARYING_SLOT_COL1)
-      slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];
-
-   if (slot == -1) {
-      /* This attribute does not exist in the VUE--that means that the vertex
-       * shader did not write to it.  This means that either:
-       *
-       * (a) This attribute is a texture coordinate, and it is going to be
-       * replaced with point coordinates (as a consequence of a call to
-       * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the
-       * hardware will ignore whatever attribute override we supply.
-       *
-       * (b) This attribute is read by the fragment shader but not written by
-       * the vertex shader, so its value is undefined.  Therefore the
-       * attribute override we supply doesn't matter.
-       *
-       * (c) This attribute is gl_PrimitiveID, and it wasn't written by the
-       * previous shader stage.
-       *
-       * Note that we don't have to worry about the cases where the attribute
-       * is gl_PointCoord or is undergoing point sprite coordinate
-       * replacement, because in those cases, this function isn't called.
-       *
-       * In case (c), we need to program the attribute overrides so that the
-       * primitive ID will be stored in this slot.  In every other case, the
-       * attribute override we supply doesn't matter.  So just go ahead and
-       * program primitive ID in every case.
-       */
-      attr->ComponentOverrideW = true;
-      attr->ComponentOverrideX = true;
-      attr->ComponentOverrideY = true;
-      attr->ComponentOverrideZ = true;
-      attr->ConstantSource = PRIM_ID;
-      return;
-   }
-
-   /* Compute the location of the attribute relative to urb_entry_read_offset.
-    * Each increment of urb_entry_read_offset represents a 256-bit value, so
-    * it counts for two 128-bit VUE slots.
-    */
-   int source_attr = slot - 2 * urb_entry_read_offset;
-   assert(source_attr >= 0 && source_attr < 32);
-
-   /* If we are doing two-sided color, and the VUE slot following this one
-    * represents a back-facing color, then we need to instruct the SF unit to
-    * do back-facing swizzling.
-    */
-   bool swizzling = two_side_color &&
-      ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 &&
-        vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) ||
-       (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 &&
-        vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1));
-
-   /* Update max_source_attr.  If swizzling, the SF will read this slot + 1. */
-   if (*max_source_attr < source_attr + swizzling)
-      *max_source_attr = source_attr + swizzling;
-
-   attr->SourceAttribute = source_attr;
-   if (swizzling)
-      attr->SwizzleSelect = INPUTATTR_FACING;
-}
-
-
-static void
-genX(calculate_attr_overrides)(const struct brw_context *brw,
-                               struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr_overrides,
-                               uint32_t *point_sprite_enables,
-                               uint32_t *urb_entry_read_length,
-                               uint32_t *urb_entry_read_offset)
-{
-   const struct gl_context *ctx = &brw->ctx;
-
-   /* _NEW_POINT */
-   const struct gl_point_attrib *point = &ctx->Point;
-
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
-
-   /* BRW_NEW_FS_PROG_DATA */
-   const struct brw_wm_prog_data *wm_prog_data =
-      brw_wm_prog_data(brw->wm.base.prog_data);
-   uint32_t max_source_attr = 0;
-
-   *point_sprite_enables = 0;
-
-   int first_slot =
-      brw_compute_first_urb_slot_required(fp->info.inputs_read,
-                                          &brw->vue_map_geom_out);
-
-   /* Each URB offset packs two varying slots */
-   assert(first_slot % 2 == 0);
-   *urb_entry_read_offset = first_slot / 2;
-
-   /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
-    * description of dw10 Point Sprite Texture Coordinate Enable:
-    *
-    * "This field must be programmed to zero when non-point primitives
-    * are rendered."
-    *
-    * The SandyBridge PRM doesn't explicitly say that point sprite enables
-    * must be programmed to zero when rendering non-point primitives, but
-    * the IvyBridge PRM does, and if we don't, we get garbage.
-    *
-    * This is not required on Haswell, as the hardware ignores this state
-    * when drawing non-points -- although we do still need to be careful to
-    * correctly set the attr overrides.
-    *
-    * _NEW_POLYGON
-    * BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA
-    */
-   bool drawing_points = brw_is_drawing_points(brw);
-
-   for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) {
-      uint8_t attr = wm_prog_data->urb_setup_attribs[idx];
-      int input_index = wm_prog_data->urb_setup[attr];
-
-      assert(0 <= input_index);
-
-      /* _NEW_POINT */
-      bool point_sprite = false;
-      if (drawing_points) {
-         if (point->PointSprite &&
-             (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7) &&
-             (point->CoordReplace & (1u << (attr - VARYING_SLOT_TEX0)))) {
-            point_sprite = true;
-         }
-
-         if (attr == VARYING_SLOT_PNTC)
-            point_sprite = true;
-
-         if (point_sprite)
-            *point_sprite_enables |= (1 << input_index);
-      }
-
-      /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
-      struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attribute = { 0 };
-
-      if (!point_sprite) {
-         genX(get_attr_override)(&attribute,
-                                 &brw->vue_map_geom_out,
-                                 *urb_entry_read_offset, attr,
-                                 _mesa_vertex_program_two_side_enabled(ctx),
-                                 &max_source_attr);
-      }
-
-      /* The hardware can only do the overrides on 16 overrides at a
-       * time, and the other up to 16 have to be lined up so that the
-       * input index = the output index.  We'll need to do some
-       * tweaking to make sure that's the case.
-       */
-      if (input_index < 16)
-         attr_overrides[input_index] = attribute;
-      else
-         assert(attribute.SourceAttribute == input_index);
-   }
-
-   /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
-    * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
-    *
-    * "This field should be set to the minimum length required to read the
-    *  maximum source attribute.  The maximum source attribute is indicated
-    *  by the maximum value of the enabled Attribute # Source Attribute if
-    *  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
-    *  enable is not set.
-    *  read_length = ceiling((max_source_attr + 1) / 2)
-    *
-    *  [errata] Corruption/Hang possible if length programmed larger than
-    *  recommended"
-    *
-    * Similar text exists for Ivy Bridge.
-    */
-   *urb_entry_read_length = DIV_ROUND_UP(max_source_attr + 1, 2);
-}
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 8
-typedef struct GENX(3DSTATE_WM_DEPTH_STENCIL) DEPTH_STENCIL_GENXML;
-#elif GFX_VER >= 6
-typedef struct GENX(DEPTH_STENCIL_STATE)      DEPTH_STENCIL_GENXML;
-#else
-typedef struct GENX(COLOR_CALC_STATE)         DEPTH_STENCIL_GENXML;
-#endif
-
-static inline void
-set_depth_stencil_bits(struct brw_context *brw, DEPTH_STENCIL_GENXML *ds)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   /* _NEW_BUFFERS */
-   struct brw_renderbuffer *depth_irb =
-      brw_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
-
-   /* _NEW_DEPTH */
-   struct gl_depthbuffer_attrib *depth = &ctx->Depth;
-
-   /* _NEW_STENCIL */
-   struct gl_stencil_attrib *stencil = &ctx->Stencil;
-   const int b = stencil->_BackFace;
-
-   if (depth->Test && depth_irb) {
-      ds->DepthTestEnable = true;
-      ds->DepthBufferWriteEnable = brw_depth_writes_enabled(brw);
-      ds->DepthTestFunction = brw_translate_compare_func(depth->Func);
-   }
-
-   if (brw->stencil_enabled) {
-      ds->StencilTestEnable = true;
-      ds->StencilWriteMask = stencil->WriteMask[0] & 0xff;
-      ds->StencilTestMask = stencil->ValueMask[0] & 0xff;
-
-      ds->StencilTestFunction =
-         brw_translate_compare_func(stencil->Function[0]);
-      ds->StencilFailOp =
-         brw_translate_stencil_op(stencil->FailFunc[0]);
-      ds->StencilPassDepthPassOp =
-         brw_translate_stencil_op(stencil->ZPassFunc[0]);
-      ds->StencilPassDepthFailOp =
-         brw_translate_stencil_op(stencil->ZFailFunc[0]);
-
-      ds->StencilBufferWriteEnable = brw->stencil_write_enabled;
-
-      if (brw->stencil_two_sided) {
-         ds->DoubleSidedStencilEnable = true;
-         ds->BackfaceStencilWriteMask = stencil->WriteMask[b] & 0xff;
-         ds->BackfaceStencilTestMask = stencil->ValueMask[b] & 0xff;
-
-         ds->BackfaceStencilTestFunction =
-            brw_translate_compare_func(stencil->Function[b]);
-         ds->BackfaceStencilFailOp =
-            brw_translate_stencil_op(stencil->FailFunc[b]);
-         ds->BackfaceStencilPassDepthPassOp =
-            brw_translate_stencil_op(stencil->ZPassFunc[b]);
-         ds->BackfaceStencilPassDepthFailOp =
-            brw_translate_stencil_op(stencil->ZFailFunc[b]);
-      }
-
-#if GFX_VER <= 5 || GFX_VER >= 9
-      ds->StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0);
-      ds->BackfaceStencilReferenceValue = _mesa_get_stencil_ref(ctx, b);
-#endif
-   }
-}
-
-#if GFX_VER >= 6
-static void
-genX(upload_depth_stencil_state)(struct brw_context *brw)
-{
-#if GFX_VER >= 8
-   brw_batch_emit(brw, GENX(3DSTATE_WM_DEPTH_STENCIL), wmds) {
-      set_depth_stencil_bits(brw, &wmds);
-   }
-#else
-   uint32_t ds_offset;
-   brw_state_emit(brw, GENX(DEPTH_STENCIL_STATE), 64, &ds_offset, ds) {
-      set_depth_stencil_bits(brw, &ds);
-   }
-
-   /* Now upload a pointer to the indirect state */
-#if GFX_VER == 6
-   brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
-      ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
-      ptr.DEPTH_STENCIL_STATEChange = true;
-   }
-#else
-   brw_batch_emit(brw, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), ptr) {
-      ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
-   }
-#endif
-#endif
-}
-
-static const struct brw_tracked_state genX(depth_stencil_state) = {
-   .dirty = {
-      .mesa = _NEW_BUFFERS |
-              _NEW_DEPTH |
-              _NEW_STENCIL,
-      .brw  = BRW_NEW_BLORP |
-              (GFX_VER >= 8 ? BRW_NEW_CONTEXT
-                            : BRW_NEW_BATCH |
-                              BRW_NEW_STATE_BASE_ADDRESS),
-   },
-   .emit = genX(upload_depth_stencil_state),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER <= 5
-
-static void
-genX(upload_clip_state)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   ctx->NewDriverState |= BRW_NEW_GFX4_UNIT_STATE;
-   brw_state_emit(brw, GENX(CLIP_STATE), 32, &brw->clip.state_offset, clip) {
-      clip.KernelStartPointer = KSP(brw, brw->clip.prog_offset);
-      clip.GRFRegisterCount =
-         DIV_ROUND_UP(brw->clip.prog_data->total_grf, 16) - 1;
-      clip.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
-      clip.SingleProgramFlow = true;
-      clip.VertexURBEntryReadLength = brw->clip.prog_data->urb_read_length;
-      clip.ConstantURBEntryReadLength = brw->clip.prog_data->curb_read_length;
-
-      /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
-      clip.ConstantURBEntryReadOffset = brw->curbe.clip_start * 2;
-      clip.DispatchGRFStartRegisterForURBData = 1;
-      clip.VertexURBEntryReadOffset = 0;
-
-      /* BRW_NEW_URB_FENCE */
-      clip.NumberofURBEntries = brw->urb.nr_clip_entries;
-      clip.URBEntryAllocationSize = brw->urb.vsize - 1;
-
-      if (brw->urb.nr_clip_entries >= 10) {
-         /* Half of the URB entries go to each thread, and it has to be an
-          * even number.
-          */
-         assert(brw->urb.nr_clip_entries % 2 == 0);
-
-         /* Although up to 16 concurrent Clip threads are allowed on Ironlake,
-          * only 2 threads can output VUEs at a time.
-          */
-         clip.MaximumNumberofThreads = (GFX_VER == 5 ? 16 : 2) - 1;
-      } else {
-         assert(brw->urb.nr_clip_entries >= 5);
-         clip.MaximumNumberofThreads = 1 - 1;
-      }
-
-      clip.VertexPositionSpace = VPOS_NDCSPACE;
-      clip.UserClipFlagsMustClipEnable = true;
-      clip.GuardbandClipTestEnable = true;
-
-      clip.ClipperViewportStatePointer =
-         ro_bo(brw->batch.state.bo, brw->clip.vp_offset);
-
-      clip.ScreenSpaceViewportXMin = -1;
-      clip.ScreenSpaceViewportXMax = 1;
-      clip.ScreenSpaceViewportYMin = -1;
-      clip.ScreenSpaceViewportYMax = 1;
-
-      clip.ViewportXYClipTestEnable = true;
-      clip.ViewportZClipTestEnable = !(ctx->Transform.DepthClampNear &&
-                                       ctx->Transform.DepthClampFar);
-
-      /* _NEW_TRANSFORM */
-      if (GFX_VER == 5 || GFX_VERx10 == 45) {
-         clip.UserClipDistanceClipTestEnableBitmask =
-            ctx->Transform.ClipPlanesEnabled;
-      } else {
-         /* Up to 6 actual clip flags, plus the 7th for the negative RHW
-          * workaround.
-          */
-         clip.UserClipDistanceClipTestEnableBitmask =
-            (ctx->Transform.ClipPlanesEnabled & 0x3f) | 0x40;
-      }
-
-      if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE)
-         clip.APIMode = APIMODE_D3D;
-      else
-         clip.APIMode = APIMODE_OGL;
-
-      clip.GuardbandClipTestEnable = true;
-
-      clip.ClipMode = brw->clip.prog_data->clip_mode;
-
-#if GFX_VERx10 == 45
-      clip.NegativeWClipTestEnable = true;
-#endif
-   }
-}
-
-const struct brw_tracked_state genX(clip_state) = {
-   .dirty = {
-      .mesa  = _NEW_TRANSFORM |
-               _NEW_VIEWPORT,
-      .brw   = BRW_NEW_BATCH |
-               BRW_NEW_BLORP |
-               BRW_NEW_CLIP_PROG_DATA |
-               BRW_NEW_PUSH_CONSTANT_ALLOCATION |
-               BRW_NEW_PROGRAM_CACHE |
-               BRW_NEW_URB_FENCE,
-   },
-   .emit = genX(upload_clip_state),
-};
-
-#else
-
-static void
-genX(upload_clip_state)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   /* _NEW_BUFFERS */
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
-
-   /* BRW_NEW_FS_PROG_DATA */
-   struct brw_wm_prog_data *wm_prog_data =
-      brw_wm_prog_data(brw->wm.base.prog_data);
-
-   brw_batch_emit(brw, GENX(3DSTATE_CLIP), clip) {
-      clip.StatisticsEnable = !brw->meta_in_progress;
-
-      if (wm_prog_data->barycentric_interp_modes &
-          BRW_BARYCENTRIC_NONPERSPECTIVE_BITS)
-         clip.NonPerspectiveBarycentricEnable = true;
-
-#if GFX_VER >= 7
-      clip.EarlyCullEnable = true;
-#endif
-
-#if GFX_VER == 7
-      clip.FrontWinding = brw->polygon_front_bit != fb->FlipY;
-
-      if (ctx->Polygon.CullFlag) {
-         switch (ctx->Polygon.CullFaceMode) {
-         case GL_FRONT:
-            clip.CullMode = CULLMODE_FRONT;
-            break;
-         case GL_BACK:
-            clip.CullMode = CULLMODE_BACK;
-            break;
-         case GL_FRONT_AND_BACK:
-            clip.CullMode = CULLMODE_BOTH;
-            break;
-         default:
-            unreachable("Should not get here: invalid CullFlag");
-         }
-      } else {
-         clip.CullMode = CULLMODE_NONE;
-      }
-#endif
-
-#if GFX_VER < 8
-      clip.UserClipDistanceCullTestEnableBitmask =
-         brw_vue_prog_data(brw->vs.base.prog_data)->cull_distance_mask;
-
-      clip.ViewportZClipTestEnable = !(ctx->Transform.DepthClampNear &&
-                                       ctx->Transform.DepthClampFar);
-#endif
-
-      /* _NEW_LIGHT */
-      if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
-         clip.TriangleStripListProvokingVertexSelect = 0;
-         clip.TriangleFanProvokingVertexSelect = 1;
-         clip.LineStripListProvokingVertexSelect = 0;
-      } else {
-         clip.TriangleStripListProvokingVertexSelect = 2;
-         clip.TriangleFanProvokingVertexSelect = 2;
-         clip.LineStripListProvokingVertexSelect = 1;
-      }
-
-      /* _NEW_TRANSFORM */
-      clip.UserClipDistanceClipTestEnableBitmask =
-         ctx->Transform.ClipPlanesEnabled;
-
-#if GFX_VER >= 8
-      clip.ForceUserClipDistanceClipTestEnableBitmask = true;
-#endif
-
-      if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE)
-         clip.APIMode = APIMODE_D3D;
-      else
-         clip.APIMode = APIMODE_OGL;
-
-      clip.GuardbandClipTestEnable = true;
-
-      /* BRW_NEW_VIEWPORT_COUNT */
-      const unsigned viewport_count = brw->clip.viewport_count;
-
-      if (ctx->RasterDiscard) {
-         clip.ClipMode = CLIPMODE_REJECT_ALL;
-#if GFX_VER == 6
-         perf_debug("Rasterizer discard is currently implemented via the "
-                    "clipper; having the GS not write primitives would "
-                    "likely be faster.\n");
-#endif
-      } else {
-         clip.ClipMode = CLIPMODE_NORMAL;
-      }
-
-      clip.ClipEnable = true;
-
-      /* _NEW_POLYGON,
-       * BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA | BRW_NEW_PRIMITIVE
-       */
-      if (!brw_is_drawing_points(brw) && !brw_is_drawing_lines(brw))
-         clip.ViewportXYClipTestEnable = true;
-
-      clip.MinimumPointWidth = 0.125;
-      clip.MaximumPointWidth = 255.875;
-      clip.MaximumVPIndex = viewport_count - 1;
-      if (_mesa_geometric_layers(fb) == 0)
-         clip.ForceZeroRTAIndexEnable = true;
-   }
-}
-
-static const struct brw_tracked_state genX(clip_state) = {
-   .dirty = {
-      .mesa  = _NEW_BUFFERS |
-               _NEW_LIGHT |
-               _NEW_POLYGON |
-               _NEW_TRANSFORM,
-      .brw   = BRW_NEW_BLORP |
-               BRW_NEW_CONTEXT |
-               BRW_NEW_FS_PROG_DATA |
-               BRW_NEW_GS_PROG_DATA |
-               BRW_NEW_VS_PROG_DATA |
-               BRW_NEW_META_IN_PROGRESS |
-               BRW_NEW_PRIMITIVE |
-               BRW_NEW_RASTERIZER_DISCARD |
-               BRW_NEW_TES_PROG_DATA |
-               BRW_NEW_VIEWPORT_COUNT,
-   },
-   .emit = genX(upload_clip_state),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-static void
-genX(upload_sf)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   float point_size;
-
-#if GFX_VER <= 7
-   /* _NEW_BUFFERS */
-   bool flip_y = ctx->DrawBuffer->FlipY;
-   UNUSED const bool multisampled_fbo =
-      _mesa_geometric_samples(ctx->DrawBuffer) > 1;
-#endif
-
-#if GFX_VER < 6
-   const struct brw_sf_prog_data *sf_prog_data = brw->sf.prog_data;
-
-   ctx->NewDriverState |= BRW_NEW_GFX4_UNIT_STATE;
-
-   brw_state_emit(brw, GENX(SF_STATE), 64, &brw->sf.state_offset, sf) {
-      sf.KernelStartPointer = KSP(brw, brw->sf.prog_offset);
-      sf.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
-      sf.GRFRegisterCount = DIV_ROUND_UP(sf_prog_data->total_grf, 16) - 1;
-      sf.DispatchGRFStartRegisterForURBData = 3;
-      sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
-      sf.VertexURBEntryReadLength = sf_prog_data->urb_read_length;
-      sf.NumberofURBEntries = brw->urb.nr_sf_entries;
-      sf.URBEntryAllocationSize = brw->urb.sfsize - 1;
-
-      /* STATE_PREFETCH command description describes this state as being
-       * something loaded through the GPE (L2 ISC), so it's INSTRUCTION
-       * domain.
-       */
-      sf.SetupViewportStateOffset =
-         ro_bo(brw->batch.state.bo, brw->sf.vp_offset);
-
-      sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
-
-      /* sf.ConstantURBEntryReadLength = stage_prog_data->curb_read_length; */
-      /* sf.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2; */
-
-      sf.MaximumNumberofThreads =
-         MIN2(GFX_VER == 5 ? 48 : 24, brw->urb.nr_sf_entries) - 1;
-
-      sf.SpritePointEnable = ctx->Point.PointSprite;
-
-      sf.DestinationOriginHorizontalBias = 0.5;
-      sf.DestinationOriginVerticalBias = 0.5;
-#else
-   brw_batch_emit(brw, GENX(3DSTATE_SF), sf) {
-      sf.StatisticsEnable = true;
-#endif
-      sf.ViewportTransformEnable = true;
-
-#if GFX_VER == 7
-      /* _NEW_BUFFERS */
-      sf.DepthBufferSurfaceFormat = brw_depthbuffer_format(brw);
-#endif
-
-#if GFX_VER <= 7
-      /* _NEW_POLYGON */
-      sf.FrontWinding = brw->polygon_front_bit != flip_y;
-#if GFX_VER >= 6
-      sf.GlobalDepthOffsetEnableSolid = ctx->Polygon.OffsetFill;
-      sf.GlobalDepthOffsetEnableWireframe = ctx->Polygon.OffsetLine;
-      sf.GlobalDepthOffsetEnablePoint = ctx->Polygon.OffsetPoint;
-
-      switch (ctx->Polygon.FrontMode) {
-         case GL_FILL:
-            sf.FrontFaceFillMode = FILL_MODE_SOLID;
-            break;
-         case GL_LINE:
-            sf.FrontFaceFillMode = FILL_MODE_WIREFRAME;
-            break;
-         case GL_POINT:
-            sf.FrontFaceFillMode = FILL_MODE_POINT;
-            break;
-         default:
-            unreachable("not reached");
-      }
-
-      switch (ctx->Polygon.BackMode) {
-         case GL_FILL:
-            sf.BackFaceFillMode = FILL_MODE_SOLID;
-            break;
-         case GL_LINE:
-            sf.BackFaceFillMode = FILL_MODE_WIREFRAME;
-            break;
-         case GL_POINT:
-            sf.BackFaceFillMode = FILL_MODE_POINT;
-            break;
-         default:
-            unreachable("not reached");
-      }
-
-      if (multisampled_fbo && ctx->Multisample.Enabled)
-         sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
-
-      sf.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
-      sf.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
-      sf.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
-#endif
-
-      sf.ScissorRectangleEnable = true;
-
-      if (ctx->Polygon.CullFlag) {
-         switch (ctx->Polygon.CullFaceMode) {
-            case GL_FRONT:
-               sf.CullMode = CULLMODE_FRONT;
-               break;
-            case GL_BACK:
-               sf.CullMode = CULLMODE_BACK;
-               break;
-            case GL_FRONT_AND_BACK:
-               sf.CullMode = CULLMODE_BOTH;
-               break;
-            default:
-               unreachable("not reached");
-         }
-      } else {
-         sf.CullMode = CULLMODE_NONE;
-      }
-
-#if GFX_VERx10 == 75
-      sf.LineStippleEnable = ctx->Line.StippleFlag;
-#endif
-
-#endif
-
-      /* _NEW_LINE */
-#if GFX_VER == 8
-      const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-      if (devinfo->platform == INTEL_PLATFORM_CHV)
-         sf.CHVLineWidth = brw_get_line_width(brw);
-      else
-         sf.LineWidth = brw_get_line_width(brw);
-#else
-      sf.LineWidth = brw_get_line_width(brw);
-#endif
-
-      if (ctx->Line.SmoothFlag) {
-         sf.LineEndCapAntialiasingRegionWidth = _10pixels;
-#if GFX_VER <= 7
-         sf.AntialiasingEnable = true;
-#endif
-      }
-
-      /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
-      point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
-      /* Clamp to the hardware limits */
-      sf.PointWidth = CLAMP(point_size, 0.125f, 255.875f);
-
-      /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
-      if (use_state_point_size(brw))
-         sf.PointWidthSource = State;
-
-#if GFX_VER >= 8
-      /* _NEW_POINT | _NEW_MULTISAMPLE */
-      if ((ctx->Point.SmoothFlag || _mesa_is_multisample_enabled(ctx)) &&
-          !ctx->Point.PointSprite)
-         sf.SmoothPointEnable = true;
-#endif
-
-#if GFX_VER == 10
-      /* _NEW_BUFFERS
-       * Smooth Point Enable bit MUST not be set when NUM_MULTISAMPLES > 1.
-       */
-      const bool multisampled_fbo =
-         _mesa_geometric_samples(ctx->DrawBuffer) > 1;
-      if (multisampled_fbo)
-         sf.SmoothPointEnable = false;
-#endif
-
-#if GFX_VERx10 >= 45
-      sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
-#endif
-
-      /* _NEW_LIGHT */
-      if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
-         sf.TriangleStripListProvokingVertexSelect = 2;
-         sf.TriangleFanProvokingVertexSelect = 2;
-         sf.LineStripListProvokingVertexSelect = 1;
-      } else {
-         sf.TriangleFanProvokingVertexSelect = 1;
-      }
-
-#if GFX_VER == 6
-      /* BRW_NEW_FS_PROG_DATA */
-      const struct brw_wm_prog_data *wm_prog_data =
-         brw_wm_prog_data(brw->wm.base.prog_data);
-
-      sf.AttributeSwizzleEnable = true;
-      sf.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
-
-      /*
-       * Window coordinates in an FBO are inverted, which means point
-       * sprite origin must be inverted, too.
-       */
-      if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == flip_y) {
-         sf.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
-      } else {
-         sf.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
-      }
-
-      /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM |
-       * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
-       */
-      uint32_t urb_entry_read_length;
-      uint32_t urb_entry_read_offset;
-      uint32_t point_sprite_enables;
-      genX(calculate_attr_overrides)(brw, sf.Attribute, &point_sprite_enables,
-                                     &urb_entry_read_length,
-                                     &urb_entry_read_offset);
-      sf.VertexURBEntryReadLength = urb_entry_read_length;
-      sf.VertexURBEntryReadOffset = urb_entry_read_offset;
-      sf.PointSpriteTextureCoordinateEnable = point_sprite_enables;
-      sf.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
-#endif
-   }
-}
-
-static const struct brw_tracked_state genX(sf_state) = {
-   .dirty = {
-      .mesa  = _NEW_LIGHT |
-               _NEW_LINE |
-               _NEW_POINT |
-               _NEW_PROGRAM |
-               (GFX_VER >= 6 ? _NEW_MULTISAMPLE : 0) |
-               (GFX_VER <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0) |
-               (GFX_VER == 10 ? _NEW_BUFFERS : 0),
-      .brw   = BRW_NEW_BLORP |
-               BRW_NEW_VUE_MAP_GEOM_OUT |
-               (GFX_VER <= 5 ? BRW_NEW_BATCH |
-                               BRW_NEW_PROGRAM_CACHE |
-                               BRW_NEW_SF_PROG_DATA |
-                               BRW_NEW_SF_VP |
-                               BRW_NEW_URB_FENCE
-                             : 0) |
-               (GFX_VER >= 6 ? BRW_NEW_CONTEXT : 0) |
-               (GFX_VER >= 6 && GFX_VER <= 7 ?
-                               BRW_NEW_GS_PROG_DATA |
-                               BRW_NEW_PRIMITIVE |
-                               BRW_NEW_TES_PROG_DATA
-                             : 0) |
-               (GFX_VER == 6 ? BRW_NEW_FS_PROG_DATA |
-                               BRW_NEW_FRAGMENT_PROGRAM
-                             : 0),
-   },
-   .emit = genX(upload_sf),
-};
-
-/* ---------------------------------------------------------------------- */
-
-static bool
-brw_color_buffer_write_enabled(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
-   unsigned i;
-
-   /* _NEW_BUFFERS */
-   for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
-      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
-      uint64_t outputs_written = fp->info.outputs_written;
-
-      /* _NEW_COLOR */
-      if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
-                 outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
-          GET_COLORMASK(ctx->Color.ColorMask, i)) {
-         return true;
-      }
-   }
-
-   return false;
-}
-
-static void
-genX(upload_wm)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   /* BRW_NEW_FS_PROG_DATA */
-   const struct brw_wm_prog_data *wm_prog_data =
-      brw_wm_prog_data(brw->wm.base.prog_data);
-
-   UNUSED bool writes_depth =
-      wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
-   UNUSED struct brw_stage_state *stage_state = &brw->wm.base;
-   UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-#if GFX_VER == 6
-   /* We can't fold this into gfx6_upload_wm_push_constants(), because
-    * according to the SNB PRM, vol 2 part 1 section 7.2.2
-    * (3DSTATE_CONSTANT_PS [DevSNB]):
-    *
-    *     "[DevSNB]: This packet must be followed by WM_STATE."
-    */
-   brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_PS), wmcp) {
-      if (wm_prog_data->base.nr_params != 0) {
-         wmcp.Buffer0Valid = true;
-         /* Pointer to the WM constant buffer.  Covered by the set of
-          * state flags from gfx6_upload_wm_push_constants.
-          */
-         wmcp.ConstantBody.PointertoConstantBuffer0 = stage_state->push_const_offset;
-         wmcp.ConstantBody.ConstantBuffer0ReadLength = stage_state->push_const_size - 1;
-      }
-   }
-#endif
-
-#if GFX_VER >= 6
-   brw_batch_emit(brw, GENX(3DSTATE_WM), wm) {
-#else
-   ctx->NewDriverState |= BRW_NEW_GFX4_UNIT_STATE;
-   brw_state_emit(brw, GENX(WM_STATE), 64, &stage_state->state_offset, wm) {
-#endif
-
-#if GFX_VER <= 6
-      wm._8PixelDispatchEnable = wm_prog_data->dispatch_8;
-      wm._16PixelDispatchEnable = wm_prog_data->dispatch_16;
-      wm._32PixelDispatchEnable = wm_prog_data->dispatch_32;
-#endif
-
-#if GFX_VER == 4
-      /* On gfx4, we only have one shader kernel */
-      if (brw_wm_state_has_ksp(wm, 0)) {
-         assert(brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0) == 0);
-         wm.KernelStartPointer0 = KSP(brw, stage_state->prog_offset);
-         wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);
-         wm.DispatchGRFStartRegisterForConstantSetupData0 =
-            brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0);
-      }
-#elif GFX_VER == 5
-      /* On gfx5, we have multiple shader kernels but only one GRF start
-       * register for all kernels
-       */
-      wm.KernelStartPointer0 = stage_state->prog_offset +
-                               brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);
-      wm.KernelStartPointer1 = stage_state->prog_offset +
-                               brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);
-      wm.KernelStartPointer2 = stage_state->prog_offset +
-                               brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);
-
-      wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);
-      wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 1);
-      wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 2);
-
-      wm.DispatchGRFStartRegisterForConstantSetupData0 =
-         wm_prog_data->base.dispatch_grf_start_reg;
-
-      /* Dispatch GRF Start should be the same for all shaders on gfx5 */
-      if (brw_wm_state_has_ksp(wm, 1)) {
-         assert(wm_prog_data->base.dispatch_grf_start_reg ==
-                brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1));
-      }
-      if (brw_wm_state_has_ksp(wm, 2)) {
-         assert(wm_prog_data->base.dispatch_grf_start_reg ==
-                brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2));
-      }
-#elif GFX_VER == 6
-      /* On gfx6, we have multiple shader kernels and we no longer specify a
-       * register count for each one.
-       */
-      wm.KernelStartPointer0 = stage_state->prog_offset +
-                               brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);
-      wm.KernelStartPointer1 = stage_state->prog_offset +
-                               brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);
-      wm.KernelStartPointer2 = stage_state->prog_offset +
-                               brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);
-
-      wm.DispatchGRFStartRegisterForConstantSetupData0 =
-         brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0);
-      wm.DispatchGRFStartRegisterForConstantSetupData1 =
-         brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1);
-      wm.DispatchGRFStartRegisterForConstantSetupData2 =
-         brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2);
-#endif
-
-#if GFX_VER <= 5
-      wm.ConstantURBEntryReadLength = wm_prog_data->base.curb_read_length;
-      /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
-      wm.ConstantURBEntryReadOffset = brw->curbe.wm_start * 2;
-      wm.SetupURBEntryReadLength = wm_prog_data->num_varying_inputs * 2;
-      wm.SetupURBEntryReadOffset = 0;
-      wm.EarlyDepthTestEnable = true;
-#endif
-
-#if GFX_VER >= 6
-      wm.LineAntialiasingRegionWidth = _10pixels;
-      wm.LineEndCapAntialiasingRegionWidth = _05pixels;
-
-      wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
-      wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
-#else
-      if (stage_state->sampler_count)
-         wm.SamplerStatePointer =
-            ro_bo(brw->batch.state.bo, stage_state->sampler_offset);
-
-      wm.LineAntialiasingRegionWidth = _05pixels;
-      wm.LineEndCapAntialiasingRegionWidth = _10pixels;
-
-      /* _NEW_POLYGON */
-      if (ctx->Polygon.OffsetFill) {
-         wm.GlobalDepthOffsetEnable = true;
-         /* Something weird going on with legacy_global_depth_bias,
-          * offset_constant, scaling and MRD.  This value passes glean
-          * but gives some odd results elsewere (eg. the
-          * quad-offset-units test).
-          */
-         wm.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
-
-         /* This is the only value that passes glean:
-         */
-         wm.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
-      }
-
-      wm.DepthCoefficientURBReadOffset = 1;
-#endif
-
-      /* BRW_NEW_STATS_WM */
-      wm.StatisticsEnable = GFX_VER >= 6 || brw->stats_wm;
-
-#if GFX_VER < 7
-      if (wm_prog_data->base.use_alt_mode)
-         wm.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
-
-      wm.SamplerCount = GFX_VER == 5 ?
-         0 : DIV_ROUND_UP(stage_state->sampler_count, 4);
-
-      wm.BindingTableEntryCount =
-         wm_prog_data->base.binding_table.size_bytes / 4;
-      wm.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
-
-#if GFX_VER == 6
-      wm.DualSourceBlendEnable =
-         wm_prog_data->dual_src_blend && (ctx->Color.BlendEnabled & 1) &&
-         ctx->Color._BlendUsesDualSrc & 0x1;
-      wm.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
-      wm.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
-
-      /* From the SNB PRM, volume 2 part 1, page 281:
-       * "If the PS kernel does not need the Position XY Offsets
-       * to compute a Position XY value, then this field should be
-       * programmed to POSOFFSET_NONE."
-       *
-       * "SW Recommendation: If the PS kernel needs the Position Offsets
-       * to compute a Position XY value, this field should match Position
-       * ZW Interpolation Mode to ensure a consistent position.xyzw
-       * computation."
-       * We only require XY sample offsets. So, this recommendation doesn't
-       * look useful at the moment. We might need this in future.
-       */
-      if (wm_prog_data->uses_pos_offset)
-         wm.PositionXYOffsetSelect = POSOFFSET_SAMPLE;
-      else
-         wm.PositionXYOffsetSelect = POSOFFSET_NONE;
-#endif
-
-      if (wm_prog_data->base.total_scratch) {
-         wm.ScratchSpaceBasePointer = rw_32_bo(stage_state->scratch_bo, 0);
-         wm.PerThreadScratchSpace =
-            ffs(stage_state->per_thread_scratch) - 11;
-      }
-
-      wm.PixelShaderComputedDepth = writes_depth;
-#endif
-
-      /* _NEW_LINE */
-      wm.LineStippleEnable = ctx->Line.StippleFlag;
-
-      /* _NEW_POLYGON */
-      wm.PolygonStippleEnable = ctx->Polygon.StippleFlag;
-
-#if GFX_VER < 8
-
-#if GFX_VER >= 6
-      wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
-
-      /* _NEW_BUFFERS */
-      const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
-
-      if (multisampled_fbo) {
-         /* _NEW_MULTISAMPLE */
-         if (ctx->Multisample.Enabled)
-            wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
-         else
-            wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
-
-         if (wm_prog_data->persample_dispatch)
-            wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
-         else
-            wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
-      } else {
-         wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
-         wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
-      }
-#endif
-      wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
-      if (wm_prog_data->uses_kill ||
-          _mesa_is_alpha_test_enabled(ctx) ||
-          _mesa_is_alpha_to_coverage_enabled(ctx) ||
-          (GFX_VER >= 6 && wm_prog_data->uses_omask)) {
-         wm.PixelShaderKillsPixel = true;
-      }
-
-      /* _NEW_BUFFERS | _NEW_COLOR */
-      if (brw_color_buffer_write_enabled(brw) || writes_depth ||
-          wm.PixelShaderKillsPixel ||
-          (GFX_VER >= 6 && wm_prog_data->has_side_effects)) {
-         wm.ThreadDispatchEnable = true;
-      }
-
-#if GFX_VER >= 7
-      wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
-      wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
-#endif
-
-      /* The "UAV access enable" bits are unnecessary on HSW because they only
-       * seem to have an effect on the HW-assisted coherency mechanism which we
-       * don't need, and the rasterization-related UAV_ONLY flag and the
-       * DISPATCH_ENABLE bit can be set independently from it.
-       * C.f. gfx8_upload_ps_extra().
-       *
-       * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS |
-       * _NEW_COLOR
-       */
-#if GFX_VERx10 == 75
-      if (!(brw_color_buffer_write_enabled(brw) || writes_depth) &&
-          wm_prog_data->has_side_effects)
-         wm.PSUAVonly = ON;
-#endif
-#endif
-
-#if GFX_VER >= 7
-      /* BRW_NEW_FS_PROG_DATA */
-      if (wm_prog_data->early_fragment_tests)
-         wm.EarlyDepthStencilControl = EDSC_PREPS;
-      else if (wm_prog_data->has_side_effects)
-         wm.EarlyDepthStencilControl = EDSC_PSEXEC;
-#endif
-   }
-
-#if GFX_VER <= 5
-   if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) {
-      brw_batch_emit(brw, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp) {
-         clamp.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
-      }
-
-      brw->wm.offset_clamp = ctx->Polygon.OffsetClamp;
-   }
-#endif
-}
-
-static const struct brw_tracked_state genX(wm_state) = {
-   .dirty = {
-      .mesa  = _NEW_LINE |
-               _NEW_POLYGON |
-               (GFX_VER < 8 ? _NEW_BUFFERS |
-                              _NEW_COLOR :
-                              0) |
-               (GFX_VER == 6 ? _NEW_PROGRAM_CONSTANTS : 0) |
-               (GFX_VER < 6 ? _NEW_POLYGONSTIPPLE : 0) |
-               (GFX_VER < 8 && GFX_VER >= 6 ? _NEW_MULTISAMPLE : 0),
-      .brw   = BRW_NEW_BLORP |
-               BRW_NEW_FS_PROG_DATA |
-               (GFX_VER < 6 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
-                              BRW_NEW_FRAGMENT_PROGRAM |
-                              BRW_NEW_PROGRAM_CACHE |
-                              BRW_NEW_SAMPLER_STATE_TABLE |
-                              BRW_NEW_STATS_WM
-                            : 0) |
-               (GFX_VER < 7 ? BRW_NEW_BATCH : BRW_NEW_CONTEXT),
-   },
-   .emit = genX(upload_wm),
-};
-
-/* ---------------------------------------------------------------------- */
-
-/* We restrict scratch buffers to the bottom 32 bits of the address space
- * by using rw_32_bo().
- *
- * General State Base Address is a bit broken.  If the address + size as
- * seen by STATE_BASE_ADDRESS overflows 48 bits, the GPU appears to treat
- * all accesses to the buffer as being out of bounds and returns zero.
- */
-
-#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
-   pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset);           \
-   /* Wa_1606682166 */                                                    \
-   pkt.SamplerCount       =                                               \
-      GFX_VER == 11 ?                                                     \
-      0 :                                                                 \
-      DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);          \
-   pkt.BindingTableEntryCount =                                           \
-      stage_prog_data->binding_table.size_bytes / 4;                      \
-   pkt.FloatingPointMode  = stage_prog_data->use_alt_mode;                \
-                                                                          \
-   if (stage_prog_data->total_scratch) {                                  \
-      pkt.ScratchSpaceBasePointer = rw_32_bo(stage_state->scratch_bo, 0); \
-      pkt.PerThreadScratchSpace =                                         \
-         ffs(stage_state->per_thread_scratch) - 11;                       \
-   }                                                                      \
-                                                                          \
-   pkt.DispatchGRFStartRegisterForURBData =                               \
-      stage_prog_data->dispatch_grf_start_reg;                            \
-   pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length;       \
-   pkt.prefix##URBEntryReadOffset = 0;                                    \
-                                                                          \
-   pkt.StatisticsEnable = true;                                           \
-   pkt.Enable           = true;
-
-static void
-genX(upload_vs_state)(struct brw_context *brw)
-{
-   UNUSED struct gl_context *ctx = &brw->ctx;
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct brw_stage_state *stage_state = &brw->vs.base;
-
-   /* BRW_NEW_VS_PROG_DATA */
-   const struct brw_vue_prog_data *vue_prog_data =
-      brw_vue_prog_data(brw->vs.base.prog_data);
-   const struct brw_stage_prog_data *stage_prog_data = &vue_prog_data->base;
-
-   assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 ||
-          vue_prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT);
-   assert(GFX_VER < 11 ||
-          vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8);
-
-#if GFX_VER == 6
-   /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
-    * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
-    *
-    *   [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS
-    *   command that causes the VS Function Enable to toggle. Pipeline
-    *   flush can be executed by sending a PIPE_CONTROL command with CS
-    *   stall bit set and a post sync operation.
-    *
-    * We've already done such a flush at the start of state upload, so we
-    * don't need to do another one here.
-    */
-   brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), cvs) {
-      if (stage_state->push_const_size != 0) {
-         cvs.Buffer0Valid = true;
-         cvs.ConstantBody.PointertoConstantBuffer0 = stage_state->push_const_offset;
-         cvs.ConstantBody.ConstantBuffer0ReadLength = stage_state->push_const_size - 1;
-      }
-   }
-#endif
-
-   if (GFX_VER == 7 && devinfo->platform == INTEL_PLATFORM_IVB)
-      gfx7_emit_vs_workaround_flush(brw);
-
-#if GFX_VER >= 6
-   brw_batch_emit(brw, GENX(3DSTATE_VS), vs) {
-#else
-   ctx->NewDriverState |= BRW_NEW_GFX4_UNIT_STATE;
-   brw_state_emit(brw, GENX(VS_STATE), 32, &stage_state->state_offset, vs) {
-#endif
-      INIT_THREAD_DISPATCH_FIELDS(vs, Vertex);
-
-      vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
-
-#if GFX_VER < 6
-      vs.GRFRegisterCount = DIV_ROUND_UP(vue_prog_data->total_grf, 16) - 1;
-      vs.ConstantURBEntryReadLength = stage_prog_data->curb_read_length;
-      vs.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2;
-
-      vs.NumberofURBEntries = brw->urb.nr_vs_entries >> (GFX_VER == 5 ? 2 : 0);
-      vs.URBEntryAllocationSize = brw->urb.vsize - 1;
-
-      vs.MaximumNumberofThreads =
-         CLAMP(brw->urb.nr_vs_entries / 2, 1, devinfo->max_vs_threads) - 1;
-
-      vs.StatisticsEnable = false;
-      vs.SamplerStatePointer =
-         ro_bo(brw->batch.state.bo, stage_state->sampler_offset);
-#endif
-
-#if GFX_VER == 5
-      /* Force single program flow on Ironlake.  We cannot reliably get
-       * all applications working without it.  See:
-       * https://bugs.freedesktop.org/show_bug.cgi?id=29172
-       *
-       * The most notable and reliably failing application is the Humus
-       * demo "CelShading"
-       */
-      vs.SingleProgramFlow = true;
-      vs.SamplerCount = 0; /* hardware requirement */
-#endif
-
-#if GFX_VER >= 8
-      vs.SIMD8DispatchEnable =
-         vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8;
-
-      vs.UserClipDistanceCullTestEnableBitmask =
-         vue_prog_data->cull_distance_mask;
-#endif
-   }
-
-#if GFX_VER == 6
-   /* Based on my reading of the simulator, the VS constants don't get
-    * pulled into the VS FF unit until an appropriate pipeline flush
-    * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds
-    * references to them into a little FIFO.  The flushes are common,
-    * but don't reliably happen between this and a 3DPRIMITIVE, causing
-    * the primitive to use the wrong constants.  Then the FIFO
-    * containing the constant setup gets added to again on the next
-    * constants change, and eventually when a flush does happen the
-    * unit is overwhelmed by constant changes and dies.
-    *
-    * To avoid this, send a PIPE_CONTROL down the line that will
-    * update the unit immediately loading the constants.  The flush
-    * type bits here were those set by the STATE_BASE_ADDRESS whose
-    * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the
-    * bug reports that led to this workaround, and may be more than
-    * what is strictly required to avoid the issue.
-    */
-   brw_emit_pipe_control_flush(brw,
-                               PIPE_CONTROL_DEPTH_STALL |
-                               PIPE_CONTROL_INSTRUCTION_INVALIDATE |
-                               PIPE_CONTROL_STATE_CACHE_INVALIDATE);
-#endif
-}
-
-static const struct brw_tracked_state genX(vs_state) = {
-   .dirty = {
-      .mesa  = (GFX_VER == 6 ? (_NEW_PROGRAM_CONSTANTS | _NEW_TRANSFORM) : 0),
-      .brw   = BRW_NEW_BATCH |
-               BRW_NEW_BLORP |
-               BRW_NEW_CONTEXT |
-               BRW_NEW_VS_PROG_DATA |
-               (GFX_VER == 6 ? BRW_NEW_VERTEX_PROGRAM : 0) |
-               (GFX_VER <= 5 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
-                               BRW_NEW_PROGRAM_CACHE |
-                               BRW_NEW_SAMPLER_STATE_TABLE |
-                               BRW_NEW_URB_FENCE
-                             : 0),
-   },
-   .emit = genX(upload_vs_state),
-};
-
-/* ---------------------------------------------------------------------- */
-
-static void
-genX(upload_cc_viewport)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   /* BRW_NEW_VIEWPORT_COUNT */
-   const unsigned viewport_count = brw->clip.viewport_count;
-
-   struct GENX(CC_VIEWPORT) ccv;
-   uint32_t cc_vp_offset;
-   uint32_t *cc_map =
-      brw_state_batch(brw, 4 * GENX(CC_VIEWPORT_length) * viewport_count,
-                      32, &cc_vp_offset);
-
-   for (unsigned i = 0; i < viewport_count; i++) {
-      /* _NEW_VIEWPORT | _NEW_TRANSFORM */
-      const struct gl_viewport_attrib *vp = &ctx->ViewportArray[i];
-      if (ctx->Transform.DepthClampNear && ctx->Transform.DepthClampFar) {
-         ccv.MinimumDepth = MIN2(vp->Near, vp->Far);
-         ccv.MaximumDepth = MAX2(vp->Near, vp->Far);
-      } else if (ctx->Transform.DepthClampNear) {
-         ccv.MinimumDepth = MIN2(vp->Near, vp->Far);
-         ccv.MaximumDepth = 0.0;
-      } else if (ctx->Transform.DepthClampFar) {
-         ccv.MinimumDepth = 0.0;
-         ccv.MaximumDepth = MAX2(vp->Near, vp->Far);
-      } else {
-         ccv.MinimumDepth = 0.0;
-         ccv.MaximumDepth = 1.0;
-      }
-      GENX(CC_VIEWPORT_pack)(NULL, cc_map, &ccv);
-      cc_map += GENX(CC_VIEWPORT_length);
-   }
-
-#if GFX_VER >= 7
-   brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) {
-      ptr.CCViewportPointer = cc_vp_offset;
-   }
-#elif GFX_VER == 6
-   brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) {
-      vp.CCViewportStateChange = 1;
-      vp.PointertoCC_VIEWPORT = cc_vp_offset;
-   }
-#else
-   brw->cc.vp_offset = cc_vp_offset;
-   ctx->NewDriverState |= BRW_NEW_CC_VP;
-#endif
-}
-
-const struct brw_tracked_state genX(cc_vp) = {
-   .dirty = {
-      .mesa = _NEW_TRANSFORM |
-              _NEW_VIEWPORT,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_VIEWPORT_COUNT,
-   },
-   .emit = genX(upload_cc_viewport)
-};
-
-/* ---------------------------------------------------------------------- */
-
-static void
-set_scissor_bits(const struct gl_context *ctx, int i,
-                 bool flip_y, unsigned fb_width, unsigned fb_height,
-                 struct GENX(SCISSOR_RECT) *sc)
-{
-   int bbox[4];
-
-   bbox[0] = MAX2(ctx->ViewportArray[i].X, 0);
-   bbox[1] = MIN2(bbox[0] + ctx->ViewportArray[i].Width, fb_width);
-   bbox[2] = CLAMP(ctx->ViewportArray[i].Y, 0, fb_height);
-   bbox[3] = MIN2(bbox[2] + ctx->ViewportArray[i].Height, fb_height);
-   _mesa_intersect_scissor_bounding_box(ctx, i, bbox);
-
-   if (bbox[0] == bbox[1] || bbox[2] == bbox[3]) {
-      /* If the scissor was out of bounds and got clamped to 0 width/height
-       * at the bounds, the subtraction of 1 from maximums could produce a
-       * negative number and thus not clip anything.  Instead, just provide
-       * a min > max scissor inside the bounds, which produces the expected
-       * no rendering.
-       */
-      sc->ScissorRectangleXMin = 1;
-      sc->ScissorRectangleXMax = 0;
-      sc->ScissorRectangleYMin = 1;
-      sc->ScissorRectangleYMax = 0;
-   } else if (!flip_y) {
-      /* texmemory: Y=0=bottom */
-      sc->ScissorRectangleXMin = bbox[0];
-      sc->ScissorRectangleXMax = bbox[1] - 1;
-      sc->ScissorRectangleYMin = bbox[2];
-      sc->ScissorRectangleYMax = bbox[3] - 1;
-   } else {
-      /* memory: Y=0=top */
-      sc->ScissorRectangleXMin = bbox[0];
-      sc->ScissorRectangleXMax = bbox[1] - 1;
-      sc->ScissorRectangleYMin = fb_height - bbox[3];
-      sc->ScissorRectangleYMax = fb_height - bbox[2] - 1;
-   }
-}
-
-#if GFX_VER >= 6
-static void
-genX(upload_scissor_state)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   const bool flip_y = ctx->DrawBuffer->FlipY;
-   struct GENX(SCISSOR_RECT) scissor;
-   uint32_t scissor_state_offset;
-   const unsigned int fb_width = _mesa_geometric_width(ctx->DrawBuffer);
-   const unsigned int fb_height = _mesa_geometric_height(ctx->DrawBuffer);
-   uint32_t *scissor_map;
-
-   /* BRW_NEW_VIEWPORT_COUNT */
-   const unsigned viewport_count = brw->clip.viewport_count;
-   /* Wa_1409725701:
-    *    "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
-    *    stored as an array of up to 16 elements. The location of first
-    *    element of the array, as specified by Pointer to SCISSOR_RECT, should
-    *    be aligned to a 64-byte boundary.
-    */
-   const unsigned alignment = 64;
-   scissor_map = brw_state_batch(
-      brw, GENX(SCISSOR_RECT_length) * sizeof(uint32_t) * viewport_count,
-      alignment, &scissor_state_offset);
-
-   /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */
-
-   /* The scissor only needs to handle the intersection of drawable and
-    * scissor rect.  Clipping to the boundaries of static shared buffers
-    * for front/back/depth is covered by looping over cliprects in brw_draw.c.
-    *
-    * Note that the hardware's coordinates are inclusive, while Mesa's min is
-    * inclusive but max is exclusive.
-    */
-   for (unsigned i = 0; i < viewport_count; i++) {
-      set_scissor_bits(ctx, i, flip_y, fb_width, fb_height, &scissor);
-      GENX(SCISSOR_RECT_pack)(
-         NULL, scissor_map + i * GENX(SCISSOR_RECT_length), &scissor);
-   }
-
-   brw_batch_emit(brw, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) {
-      ptr.ScissorRectPointer = scissor_state_offset;
-   }
-}
-
-static const struct brw_tracked_state genX(scissor_state) = {
-   .dirty = {
-      .mesa = _NEW_BUFFERS |
-              _NEW_SCISSOR |
-              _NEW_VIEWPORT,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_VIEWPORT_COUNT,
-   },
-   .emit = genX(upload_scissor_state),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-static void
-genX(upload_sf_clip_viewport)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   float y_scale, y_bias;
-
-   /* BRW_NEW_VIEWPORT_COUNT */
-   const unsigned viewport_count = brw->clip.viewport_count;
-
-   /* _NEW_BUFFERS */
-   const bool flip_y = ctx->DrawBuffer->FlipY;
-   const uint32_t fb_width = (float)_mesa_geometric_width(ctx->DrawBuffer);
-   const uint32_t fb_height = (float)_mesa_geometric_height(ctx->DrawBuffer);
-
-#if GFX_VER >= 7
-#define clv sfv
-   struct GENX(SF_CLIP_VIEWPORT) sfv;
-   uint32_t sf_clip_vp_offset;
-   uint32_t *sf_clip_map =
-      brw_state_batch(brw, GENX(SF_CLIP_VIEWPORT_length) * 4 * viewport_count,
-                      64, &sf_clip_vp_offset);
-#else
-   struct GENX(SF_VIEWPORT) sfv;
-   struct GENX(CLIP_VIEWPORT) clv;
-   uint32_t sf_vp_offset, clip_vp_offset;
-   uint32_t *sf_map =
-      brw_state_batch(brw, GENX(SF_VIEWPORT_length) * 4 * viewport_count,
-                      32, &sf_vp_offset);
-   uint32_t *clip_map =
-      brw_state_batch(brw, GENX(CLIP_VIEWPORT_length) * 4 * viewport_count,
-                      32, &clip_vp_offset);
-#endif
-
-   /* _NEW_BUFFERS */
-   if (flip_y) {
-      y_scale = -1.0;
-      y_bias = (float)fb_height;
-   } else {
-      y_scale = 1.0;
-      y_bias = 0;
-   }
-
-   for (unsigned i = 0; i < brw->clip.viewport_count; i++) {
-      /* _NEW_VIEWPORT: Guardband Clipping */
-      float scale[3], translate[3], gb_xmin, gb_xmax, gb_ymin, gb_ymax;
-      _mesa_get_viewport_xform(ctx, i, scale, translate);
-
-      sfv.ViewportMatrixElementm00 = scale[0];
-      sfv.ViewportMatrixElementm11 = scale[1] * y_scale,
-      sfv.ViewportMatrixElementm22 = scale[2],
-      sfv.ViewportMatrixElementm30 = translate[0],
-      sfv.ViewportMatrixElementm31 = translate[1] * y_scale + y_bias,
-      sfv.ViewportMatrixElementm32 = translate[2],
-      intel_calculate_guardband_size(fb_width, fb_height,
-                                     sfv.ViewportMatrixElementm00,
-                                     sfv.ViewportMatrixElementm11,
-                                     sfv.ViewportMatrixElementm30,
-                                     sfv.ViewportMatrixElementm31,
-                                     &gb_xmin, &gb_xmax, &gb_ymin, &gb_ymax);
-
-
-      clv.XMinClipGuardband = gb_xmin;
-      clv.XMaxClipGuardband = gb_xmax;
-      clv.YMinClipGuardband = gb_ymin;
-      clv.YMaxClipGuardband = gb_ymax;
-
-#if GFX_VER < 6
-      set_scissor_bits(ctx, i, flip_y, fb_width, fb_height,
-                       &sfv.ScissorRectangle);
-#elif GFX_VER >= 8
-      /* _NEW_VIEWPORT | _NEW_BUFFERS: Screen Space Viewport
-       * The hardware will take the intersection of the drawing rectangle,
-       * scissor rectangle, and the viewport extents.  However, emitting
-       * 3DSTATE_DRAWING_RECTANGLE is expensive since it requires a full
-       * pipeline stall so we're better off just being a little more clever
-       * with our viewport so we can emit it once at context creation time.
-       */
-      const float viewport_Xmin = MAX2(ctx->ViewportArray[i].X, 0);
-      const float viewport_Ymin = MAX2(ctx->ViewportArray[i].Y, 0);
-      const float viewport_Xmax =
-         MIN2(ctx->ViewportArray[i].X + ctx->ViewportArray[i].Width, fb_width);
-      const float viewport_Ymax =
-         MIN2(ctx->ViewportArray[i].Y + ctx->ViewportArray[i].Height, fb_height);
-
-      if (flip_y) {
-         sfv.XMinViewPort = viewport_Xmin;
-         sfv.XMaxViewPort = viewport_Xmax - 1;
-         sfv.YMinViewPort = fb_height - viewport_Ymax;
-         sfv.YMaxViewPort = fb_height - viewport_Ymin - 1;
-      } else {
-         sfv.XMinViewPort = viewport_Xmin;
-         sfv.XMaxViewPort = viewport_Xmax - 1;
-         sfv.YMinViewPort = viewport_Ymin;
-         sfv.YMaxViewPort = viewport_Ymax - 1;
-      }
-#endif
-
-#if GFX_VER >= 7
-      GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_map, &sfv);
-      sf_clip_map += GENX(SF_CLIP_VIEWPORT_length);
-#else
-      GENX(SF_VIEWPORT_pack)(NULL, sf_map, &sfv);
-      GENX(CLIP_VIEWPORT_pack)(NULL, clip_map, &clv);
-      sf_map += GENX(SF_VIEWPORT_length);
-      clip_map += GENX(CLIP_VIEWPORT_length);
-#endif
-   }
-
-#if GFX_VER >= 7
-   brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) {
-      ptr.SFClipViewportPointer = sf_clip_vp_offset;
-   }
-#elif GFX_VER == 6
-   brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vp) {
-      vp.SFViewportStateChange = 1;
-      vp.CLIPViewportStateChange = 1;
-      vp.PointertoCLIP_VIEWPORT = clip_vp_offset;
-      vp.PointertoSF_VIEWPORT = sf_vp_offset;
-   }
-#else
-   brw->sf.vp_offset = sf_vp_offset;
-   brw->clip.vp_offset = clip_vp_offset;
-   brw->ctx.NewDriverState |= BRW_NEW_SF_VP | BRW_NEW_CLIP_VP;
-#endif
-}
-
-static const struct brw_tracked_state genX(sf_clip_viewport) = {
-   .dirty = {
-      .mesa = _NEW_BUFFERS |
-              _NEW_VIEWPORT |
-              (GFX_VER <= 5 ? _NEW_SCISSOR : 0),
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_VIEWPORT_COUNT,
-   },
-   .emit = genX(upload_sf_clip_viewport),
-};
-
-/* ---------------------------------------------------------------------- */
-
-static void
-genX(upload_gs_state)(struct brw_context *brw)
-{
-   UNUSED struct gl_context *ctx = &brw->ctx;
-   UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const struct brw_stage_state *stage_state = &brw->gs.base;
-   const struct gl_program *gs_prog = brw->programs[MESA_SHADER_GEOMETRY];
-   /* BRW_NEW_GEOMETRY_PROGRAM */
-   bool active = GFX_VER >= 6 && gs_prog;
-
-   /* BRW_NEW_GS_PROG_DATA */
-   struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
-   UNUSED const struct brw_vue_prog_data *vue_prog_data =
-      brw_vue_prog_data(stage_prog_data);
-#if GFX_VER >= 7
-   const struct brw_gs_prog_data *gs_prog_data =
-      brw_gs_prog_data(stage_prog_data);
-#endif
-
-#if GFX_VER == 6
-   brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_GS), cgs) {
-      if (active && stage_state->push_const_size != 0) {
-         cgs.Buffer0Valid = true;
-         cgs.ConstantBody.PointertoConstantBuffer0 = stage_state->push_const_offset;
-         cgs.ConstantBody.ConstantBuffer0ReadLength = stage_state->push_const_size - 1;
-      }
-   }
-#endif
-
-#if GFX_VERx10 == 70
-   /**
-    * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
-    * Geometry > Geometry Shader > State:
-    *
-    *     "Note: Because of corruption in IVB:GT2, software needs to flush the
-    *     whole fixed function pipeline when the GS enable changes value in
-    *     the 3DSTATE_GS."
-    *
-    * The hardware architects have clarified that in this context "flush the
-    * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
-    * Stall" bit set.
-    */
-   if (devinfo->gt == 2 && brw->gs.enabled != active)
-      gfx7_emit_cs_stall_flush(brw);
-#endif
-
-#if GFX_VER >= 6
-   brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
-#else
-   ctx->NewDriverState |= BRW_NEW_GFX4_UNIT_STATE;
-   brw_state_emit(brw, GENX(GS_STATE), 32, &brw->ff_gs.state_offset, gs) {
-#endif
-
-#if GFX_VER >= 6
-      if (active) {
-         INIT_THREAD_DISPATCH_FIELDS(gs, Vertex);
-
-#if GFX_VER >= 7
-         gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
-         gs.OutputTopology = gs_prog_data->output_topology;
-         gs.ControlDataHeaderSize =
-            gs_prog_data->control_data_header_size_hwords;
-
-         gs.InstanceControl = gs_prog_data->invocations - 1;
-         gs.DispatchMode = vue_prog_data->dispatch_mode;
-
-         gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
-
-         gs.ControlDataFormat = gs_prog_data->control_data_format;
-#endif
-
-         /* Note: the meaning of the GFX7_GS_REORDER_TRAILING bit changes between
-          * Ivy Bridge and Haswell.
-          *
-          * On Ivy Bridge, setting this bit causes the vertices of a triangle
-          * strip to be delivered to the geometry shader in an order that does
-          * not strictly follow the OpenGL spec, but preserves triangle
-          * orientation.  For example, if the vertices are (1, 2, 3, 4, 5), then
-          * the geometry shader sees triangles:
-          *
-          * (1, 2, 3), (2, 4, 3), (3, 4, 5)
-          *
-          * (Clearing the bit is even worse, because it fails to preserve
-          * orientation).
-          *
-          * Triangle strips with adjacency always ordered in a way that preserves
-          * triangle orientation but does not strictly follow the OpenGL spec,
-          * regardless of the setting of this bit.
-          *
-          * On Haswell, both triangle strips and triangle strips with adjacency
-          * are always ordered in a way that preserves triangle orientation.
-          * Setting this bit causes the ordering to strictly follow the OpenGL
-          * spec.
-          *
-          * So in either case we want to set the bit.  Unfortunately on Ivy
-          * Bridge this will get the order close to correct but not perfect.
-          */
-         gs.ReorderMode = TRAILING;
-         gs.MaximumNumberofThreads =
-            GFX_VER == 8 ? (devinfo->max_gs_threads / 2 - 1)
-                         : (devinfo->max_gs_threads - 1);
-
-#if GFX_VER < 7
-         gs.SOStatisticsEnable = true;
-         if (gs_prog->info.has_transform_feedback_varyings)
-            gs.SVBIPayloadEnable = _mesa_is_xfb_active_and_unpaused(ctx);
-
-         /* GFX6_GS_SPF_MODE and GFX6_GS_VECTOR_MASK_ENABLE are enabled as it
-          * was previously done for gfx6.
-          *
-          * TODO: test with both disabled to see if the HW is behaving
-          * as expected, like in gfx7.
-          */
-         gs.SingleProgramFlow = true;
-         gs.VectorMaskEnable = true;
-#endif
-
-#if GFX_VER >= 8
-         gs.ExpectedVertexCount = gs_prog_data->vertices_in;
-
-         if (gs_prog_data->static_vertex_count != -1) {
-            gs.StaticOutput = true;
-            gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count;
-         }
-         gs.IncludeVertexHandles = vue_prog_data->include_vue_handles;
-
-         gs.UserClipDistanceCullTestEnableBitmask =
-            vue_prog_data->cull_distance_mask;
-
-         const int urb_entry_write_offset = 1;
-         const uint32_t urb_entry_output_length =
-            DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) -
-            urb_entry_write_offset;
-
-         gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset;
-         gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1);
-#endif
-      }
-#endif
-
-#if GFX_VER <= 6
-      if (!active && brw->ff_gs.prog_active) {
-         /* In gfx6, transform feedback for the VS stage is done with an
-          * ad-hoc GS program. This function provides the needed 3DSTATE_GS
-          * for this.
-          */
-         gs.KernelStartPointer = KSP(brw, brw->ff_gs.prog_offset);
-         gs.SingleProgramFlow = true;
-         gs.DispatchGRFStartRegisterForURBData = GFX_VER == 6 ? 2 : 1;
-         gs.VertexURBEntryReadLength = brw->ff_gs.prog_data->urb_read_length;
-
-#if GFX_VER <= 5
-         gs.GRFRegisterCount =
-            DIV_ROUND_UP(brw->ff_gs.prog_data->total_grf, 16) - 1;
-         /* BRW_NEW_URB_FENCE */
-         gs.NumberofURBEntries = brw->urb.nr_gs_entries;
-         gs.URBEntryAllocationSize = brw->urb.vsize - 1;
-         gs.MaximumNumberofThreads = brw->urb.nr_gs_entries >= 8 ? 1 : 0;
-         gs.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
-#else
-         gs.Enable = true;
-         gs.VectorMaskEnable = true;
-         gs.SVBIPayloadEnable = true;
-         gs.SVBIPostIncrementEnable = true;
-         gs.SVBIPostIncrementValue =
-            brw->ff_gs.prog_data->svbi_postincrement_value;
-         gs.SOStatisticsEnable = true;
-         gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1;
-#endif
-      }
-#endif
-      if (!active && !brw->ff_gs.prog_active) {
-#if GFX_VER < 8
-         gs.DispatchGRFStartRegisterForURBData = 1;
-#if GFX_VER >= 7
-         gs.IncludeVertexHandles = true;
-#endif
-#endif
-      }
-
-#if GFX_VER >= 6
-      gs.StatisticsEnable = true;
-#endif
-#if GFX_VER == 5 || GFX_VER == 6
-      gs.RenderingEnabled = true;
-#endif
-#if GFX_VER <= 5
-      gs.MaximumVPIndex = brw->clip.viewport_count - 1;
-#endif
-   }
-
-#if GFX_VER == 6
-   brw->gs.enabled = active;
-#endif
-}
-
-static const struct brw_tracked_state genX(gs_state) = {
-   .dirty = {
-      .mesa  = (GFX_VER == 6 ? _NEW_PROGRAM_CONSTANTS : 0),
-      .brw   = BRW_NEW_BATCH |
-               BRW_NEW_BLORP |
-               (GFX_VER <= 5 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
-                               BRW_NEW_PROGRAM_CACHE |
-                               BRW_NEW_URB_FENCE |
-                               BRW_NEW_VIEWPORT_COUNT
-                             : 0) |
-               (GFX_VER >= 6 ? BRW_NEW_CONTEXT |
-                               BRW_NEW_GEOMETRY_PROGRAM |
-                               BRW_NEW_GS_PROG_DATA
-                             : 0) |
-               (GFX_VER < 7 ? BRW_NEW_FF_GS_PROG_DATA : 0),
-   },
-   .emit = genX(upload_gs_state),
-};
-
-/* ---------------------------------------------------------------------- */
-
-UNUSED static GLenum
-fix_dual_blend_alpha_to_one(GLenum function)
-{
-   switch (function) {
-   case GL_SRC1_ALPHA:
-      return GL_ONE;
-
-   case GL_ONE_MINUS_SRC1_ALPHA:
-      return GL_ZERO;
-   }
-
-   return function;
-}
-
-#define blend_factor(x) brw_translate_blend_factor(x)
-#define blend_eqn(x) brw_translate_blend_equation(x)
-
-/**
- * Modify blend function to force destination alpha to 1.0
- *
- * If \c function specifies a blend function that uses destination alpha,
- * replace it with a function that hard-wires destination alpha to 1.0.  This
- * is used when rendering to xRGB targets.
- */
-static GLenum
-brw_fix_xRGB_alpha(GLenum function)
-{
-   switch (function) {
-   case GL_DST_ALPHA:
-      return GL_ONE;
-
-   case GL_ONE_MINUS_DST_ALPHA:
-   case GL_SRC_ALPHA_SATURATE:
-      return GL_ZERO;
-   }
-
-   return function;
-}
-
-#if GFX_VER >= 6
-typedef struct GENX(BLEND_STATE_ENTRY) BLEND_ENTRY_GENXML;
-#else
-typedef struct GENX(COLOR_CALC_STATE) BLEND_ENTRY_GENXML;
-#endif
-
-UNUSED static bool
-set_blend_entry_bits(struct brw_context *brw, BLEND_ENTRY_GENXML *entry, int i,
-                     bool alpha_to_one)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   /* _NEW_BUFFERS */
-   const struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
-
-   bool independent_alpha_blend = false;
-
-   /* Used for implementing the following bit of GL_EXT_texture_integer:
-    * "Per-fragment operations that require floating-point color
-    *  components, including multisample alpha operations, alpha test,
-    *  blending, and dithering, have no effect when the corresponding
-    *  colors are written to an integer color buffer."
-    */
-   const bool integer = ctx->DrawBuffer->_IntegerBuffers & (0x1 << i);
-
-   const unsigned blend_enabled = GFX_VER >= 6 ?
-      ctx->Color.BlendEnabled & (1 << i) : ctx->Color.BlendEnabled;
-
-   /* _NEW_COLOR */
-   if (ctx->Color.ColorLogicOpEnabled) {
-      GLenum rb_type = rb ? _mesa_get_format_datatype(rb->Format)
-         : GL_UNSIGNED_NORMALIZED;
-      WARN_ONCE(ctx->Color.LogicOp != GL_COPY &&
-                rb_type != GL_UNSIGNED_NORMALIZED &&
-                rb_type != GL_FLOAT, "Ignoring %s logic op on %s "
-                "renderbuffer\n",
-                _mesa_enum_to_string(ctx->Color.LogicOp),
-                _mesa_enum_to_string(rb_type));
-      if (GFX_VER >= 8 || rb_type == GL_UNSIGNED_NORMALIZED) {
-         entry->LogicOpEnable = true;
-         entry->LogicOpFunction = ctx->Color._LogicOp;
-      }
-   } else if (blend_enabled &&
-              ctx->Color._AdvancedBlendMode == BLEND_NONE
-              && (GFX_VER <= 5 || !integer)) {
-      GLenum eqRGB = ctx->Color.Blend[i].EquationRGB;
-      GLenum eqA = ctx->Color.Blend[i].EquationA;
-      GLenum srcRGB = ctx->Color.Blend[i].SrcRGB;
-      GLenum dstRGB = ctx->Color.Blend[i].DstRGB;
-      GLenum srcA = ctx->Color.Blend[i].SrcA;
-      GLenum dstA = ctx->Color.Blend[i].DstA;
-
-      if (eqRGB == GL_MIN || eqRGB == GL_MAX)
-         srcRGB = dstRGB = GL_ONE;
-
-      if (eqA == GL_MIN || eqA == GL_MAX)
-         srcA = dstA = GL_ONE;
-
-      /* Due to hardware limitations, the destination may have information
-       * in an alpha channel even when the format specifies no alpha
-       * channel. In order to avoid getting any incorrect blending due to
-       * that alpha channel, coerce the blend factors to values that will
-       * not read the alpha channel, but will instead use the correct
-       * implicit value for alpha.
-       */
-      if (rb && !_mesa_base_format_has_channel(rb->_BaseFormat,
-                                               GL_TEXTURE_ALPHA_TYPE)) {
-         srcRGB = brw_fix_xRGB_alpha(srcRGB);
-         srcA = brw_fix_xRGB_alpha(srcA);
-         dstRGB = brw_fix_xRGB_alpha(dstRGB);
-         dstA = brw_fix_xRGB_alpha(dstA);
-      }
-
-      /* From the BLEND_STATE docs, DWord 0, Bit 29 (AlphaToOne Enable):
-       * "If Dual Source Blending is enabled, this bit must be disabled."
-       *
-       * We override SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO,
-       * and leave it enabled anyway.
-       */
-      if (GFX_VER >= 6 && ctx->Color._BlendUsesDualSrc & (1 << i) && alpha_to_one) {
-         srcRGB = fix_dual_blend_alpha_to_one(srcRGB);
-         srcA = fix_dual_blend_alpha_to_one(srcA);
-         dstRGB = fix_dual_blend_alpha_to_one(dstRGB);
-         dstA = fix_dual_blend_alpha_to_one(dstA);
-      }
-
-      /* BRW_NEW_FS_PROG_DATA */
-      const struct brw_wm_prog_data *wm_prog_data =
-         brw_wm_prog_data(brw->wm.base.prog_data);
-
-      /* The Dual Source Blending documentation says:
-       *
-       * "If SRC1 is included in a src/dst blend factor and
-       * a DualSource RT Write message is not used, results
-       * are UNDEFINED. (This reflects the same restriction in DX APIs,
-       * where undefined results are produced if âo1â is not written
-       * by a PS â there are no default values defined).
-       * If SRC1 is not included in a src/dst blend factor,
-       * dual source blending must be disabled."
-       *
-       * There is no way to gracefully fix this undefined situation
-       * so we just disable the blending to prevent possible issues.
-       */
-      entry->ColorBufferBlendEnable =
-         !(ctx->Color._BlendUsesDualSrc & 0x1) || wm_prog_data->dual_src_blend;
-
-      entry->DestinationBlendFactor = blend_factor(dstRGB);
-      entry->SourceBlendFactor = blend_factor(srcRGB);
-      entry->DestinationAlphaBlendFactor = blend_factor(dstA);
-      entry->SourceAlphaBlendFactor = blend_factor(srcA);
-      entry->ColorBlendFunction = blend_eqn(eqRGB);
-      entry->AlphaBlendFunction = blend_eqn(eqA);
-
-      if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB)
-         independent_alpha_blend = true;
-   }
-
-   return independent_alpha_blend;
-}
-
-#if GFX_VER >= 6
-static void
-genX(upload_blend_state)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   int size;
-
-   /* We need at least one BLEND_STATE written, because we might do
-    * thread dispatch even if _NumColorDrawBuffers is 0 (for example
-    * for computed depth or alpha test), which will do an FB write
-    * with render target 0, which will reference BLEND_STATE[0] for
-    * alpha test enable.
-    */
-   int nr_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers;
-   if (nr_draw_buffers == 0 && ctx->Color.AlphaEnabled)
-      nr_draw_buffers = 1;
-
-   size = GENX(BLEND_STATE_ENTRY_length) * 4 * nr_draw_buffers;
-#if GFX_VER >= 8
-   size += GENX(BLEND_STATE_length) * 4;
-#endif
-
-   uint32_t *blend_map;
-   blend_map = brw_state_batch(brw, size, 64, &brw->cc.blend_state_offset);
-
-#if GFX_VER >= 8
-   struct GENX(BLEND_STATE) blend = { 0 };
-   {
-#else
-   for (int i = 0; i < nr_draw_buffers; i++) {
-      struct GENX(BLEND_STATE_ENTRY) entry = { 0 };
-#define blend entry
-#endif
-      /* OpenGL specification 3.3 (page 196), section 4.1.3 says:
-       * "If drawbuffer zero is not NONE and the buffer it references has an
-       * integer format, the SAMPLE_ALPHA_TO_COVERAGE and SAMPLE_ALPHA_TO_ONE
-       * operations are skipped."
-       */
-      if (!(ctx->DrawBuffer->_IntegerBuffers & 0x1)) {
-         /* _NEW_MULTISAMPLE */
-         if (_mesa_is_multisample_enabled(ctx)) {
-            if (ctx->Multisample.SampleAlphaToCoverage) {
-               blend.AlphaToCoverageEnable = true;
-               blend.AlphaToCoverageDitherEnable = GFX_VER >= 7;
-            }
-            if (ctx->Multisample.SampleAlphaToOne)
-               blend.AlphaToOneEnable = true;
-         }
-
-         /* _NEW_COLOR */
-         if (ctx->Color.AlphaEnabled) {
-            blend.AlphaTestEnable = true;
-            blend.AlphaTestFunction =
-               brw_translate_compare_func(ctx->Color.AlphaFunc);
-         }
-
-         if (ctx->Color.DitherFlag) {
-            blend.ColorDitherEnable = true;
-         }
-      }
-
-#if GFX_VER >= 8
-      for (int i = 0; i < nr_draw_buffers; i++) {
-         struct GENX(BLEND_STATE_ENTRY) entry = { 0 };
-#else
-      {
-#endif
-         blend.IndependentAlphaBlendEnable =
-            set_blend_entry_bits(brw, &entry, i, blend.AlphaToOneEnable) ||
-            blend.IndependentAlphaBlendEnable;
-
-         /* See section 8.1.6 "Pre-Blend Color Clamping" of the
-          * SandyBridge PRM Volume 2 Part 1 for HW requirements.
-          *
-          * We do our ARB_color_buffer_float CLAMP_FRAGMENT_COLOR
-          * clamping in the fragment shader.  For its clamping of
-          * blending, the spec says:
-          *
-          *     "RESOLVED: For fixed-point color buffers, the inputs and
-          *      the result of the blending equation are clamped.  For
-          *      floating-point color buffers, no clamping occurs."
-          *
-          * So, generally, we want clamping to the render target's range.
-          * And, good news, the hardware tables for both pre- and
-          * post-blend color clamping are either ignored, or any are
-          * allowed, or clamping is required but RT range clamping is a
-          * valid option.
-          */
-         entry.PreBlendColorClampEnable = true;
-         entry.PostBlendColorClampEnable = true;
-         entry.ColorClampRange = COLORCLAMP_RTFORMAT;
-
-         entry.WriteDisableRed   = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 0);
-         entry.WriteDisableGreen = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 1);
-         entry.WriteDisableBlue  = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 2);
-         entry.WriteDisableAlpha = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 3);
-
-#if GFX_VER >= 8
-         GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[1 + i * 2], &entry);
-#else
-         GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[i * 2], &entry);
-#endif
-      }
-   }
-
-#if GFX_VER >= 8
-   GENX(BLEND_STATE_pack)(NULL, blend_map, &blend);
-#endif
-
-#if GFX_VER < 7
-   brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
-      ptr.PointertoBLEND_STATE = brw->cc.blend_state_offset;
-      ptr.BLEND_STATEChange = true;
-   }
-#else
-   brw_batch_emit(brw, GENX(3DSTATE_BLEND_STATE_POINTERS), ptr) {
-      ptr.BlendStatePointer = brw->cc.blend_state_offset;
-#if GFX_VER >= 8
-      ptr.BlendStatePointerValid = true;
-#endif
-   }
-#endif
-}
-
-UNUSED static const struct brw_tracked_state genX(blend_state) = {
-   .dirty = {
-      .mesa = _NEW_BUFFERS |
-              _NEW_COLOR |
-              _NEW_MULTISAMPLE,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_FS_PROG_DATA |
-             BRW_NEW_STATE_BASE_ADDRESS,
-   },
-   .emit = genX(upload_blend_state),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 7
-UNUSED static const uint32_t push_constant_opcodes[] = {
-   [MESA_SHADER_VERTEX]                      = 21,
-   [MESA_SHADER_TESS_CTRL]                   = 25, /* HS */
-   [MESA_SHADER_TESS_EVAL]                   = 26, /* DS */
-   [MESA_SHADER_GEOMETRY]                    = 22,
-   [MESA_SHADER_FRAGMENT]                    = 23,
-   [MESA_SHADER_COMPUTE]                     = 0,
-};
-
-static void
-genX(upload_push_constant_packets)(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-
-   UNUSED uint32_t mocs = brw_mocs(&brw->isl_dev, NULL);
-
-   struct brw_stage_state *stage_states[] = {
-      &brw->vs.base,
-      &brw->tcs.base,
-      &brw->tes.base,
-      &brw->gs.base,
-      &brw->wm.base,
-   };
-
-
-   if (GFX_VERx10 == 70 &&
-       devinfo->platform == INTEL_PLATFORM_IVB &&
-       stage_states[MESA_SHADER_VERTEX]->push_constants_dirty)
-      gfx7_emit_vs_workaround_flush(brw);
-
-   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
-      struct brw_stage_state *stage_state = stage_states[stage];
-      UNUSED struct gl_program *prog = ctx->_Shader->CurrentProgram[stage];
-
-      if (!stage_state->push_constants_dirty)
-         continue;
-
-      brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), pkt) {
-         pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
-#if GFX_VER >= 9
-         pkt.MOCS = mocs;
-#elif GFX_VER < 8
-         /* MOCS is MBZ on Gfx8 so we skip it there */
-         pkt.ConstantBody.MOCS = mocs;
-#endif
-         if (stage_state->prog_data) {
-#if GFX_VERx10 >= 75
-            /* The Skylake PRM contains the following restriction:
-             *
-             *    "The driver must ensure The following case does not occur
-             *     without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
-             *     buffer 3 read length equal to zero committed followed by a
-             *     3DSTATE_CONSTANT_* with buffer 0 read length not equal to
-             *     zero committed."
-             *
-             * To avoid this, we program the buffers in the highest slots.
-             * This way, slot 0 is only used if slot 3 is also used.
-             */
-            int n = 3;
-
-            for (int i = 3; i >= 0; i--) {
-               const struct brw_ubo_range *range =
-                  &stage_state->prog_data->ubo_ranges[i];
-
-               if (range->length == 0)
-                  continue;
-
-               const struct gl_uniform_block *block =
-                  prog->sh.UniformBlocks[range->block];
-               const struct gl_buffer_binding *binding =
-                  &ctx->UniformBufferBindings[block->Binding];
-
-               if (!binding->BufferObject) {
-                  static unsigned msg_id = 0;
-                  _mesa_gl_debugf(ctx, &msg_id, MESA_DEBUG_SOURCE_API,
-                                  MESA_DEBUG_TYPE_UNDEFINED,
-                                  MESA_DEBUG_SEVERITY_HIGH,
-                                  "UBO %d unbound, %s shader uniform data "
-                                  "will be undefined.",
-                                  range->block,
-                                  _mesa_shader_stage_to_string(stage));
-                  continue;
-               }
-
-               assert(binding->Offset % 32 == 0);
-
-               struct brw_bo *bo = brw_bufferobj_buffer(brw,
-                  brw_buffer_object(binding->BufferObject),
-                  binding->Offset, range->length * 32, false);
-
-               pkt.ConstantBody.ReadLength[n] = range->length;
-               pkt.ConstantBody.Buffer[n] =
-                  ro_bo(bo, range->start * 32 + binding->Offset);
-               n--;
-            }
-
-            if (stage_state->push_const_size > 0) {
-               assert(n >= 0);
-               pkt.ConstantBody.ReadLength[n] = stage_state->push_const_size;
-               pkt.ConstantBody.Buffer[n] =
-                  ro_bo(stage_state->push_const_bo,
-                        stage_state->push_const_offset);
-            }
-#else
-            pkt.ConstantBody.ReadLength[0] = stage_state->push_const_size;
-            pkt.ConstantBody.Buffer[0].offset = stage_state->push_const_offset;
-#endif
-         }
-      }
-
-      stage_state->push_constants_dirty = false;
-      brw->ctx.NewDriverState |= GFX_VER >= 9 ? BRW_NEW_SURFACES : 0;
-   }
-}
-
-const struct brw_tracked_state genX(push_constant_packets) = {
-   .dirty = {
-      .mesa  = 0,
-      .brw   = BRW_NEW_DRAW_CALL,
-   },
-   .emit = genX(upload_push_constant_packets),
-};
-#endif
-
-#if GFX_VER >= 6
-static void
-genX(upload_vs_push_constants)(struct brw_context *brw)
-{
-   struct brw_stage_state *stage_state = &brw->vs.base;
-
-   /* BRW_NEW_VERTEX_PROGRAM */
-   const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
-   /* BRW_NEW_VS_PROG_DATA */
-   const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
-
-   gfx6_upload_push_constants(brw, vp, prog_data, stage_state);
-}
-
-static const struct brw_tracked_state genX(vs_push_constants) = {
-   .dirty = {
-      .mesa  = _NEW_PROGRAM_CONSTANTS |
-               _NEW_TRANSFORM,
-      .brw   = BRW_NEW_BATCH |
-               BRW_NEW_BLORP |
-               BRW_NEW_VERTEX_PROGRAM |
-               BRW_NEW_VS_PROG_DATA,
-   },
-   .emit = genX(upload_vs_push_constants),
-};
-
-static void
-genX(upload_gs_push_constants)(struct brw_context *brw)
-{
-   struct brw_stage_state *stage_state = &brw->gs.base;
-
-   /* BRW_NEW_GEOMETRY_PROGRAM */
-   const struct gl_program *gp = brw->programs[MESA_SHADER_GEOMETRY];
-
-   /* BRW_NEW_GS_PROG_DATA */
-   struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
-
-   gfx6_upload_push_constants(brw, gp, prog_data, stage_state);
-}
-
-static const struct brw_tracked_state genX(gs_push_constants) = {
-   .dirty = {
-      .mesa  = _NEW_PROGRAM_CONSTANTS |
-               _NEW_TRANSFORM,
-      .brw   = BRW_NEW_BATCH |
-               BRW_NEW_BLORP |
-               BRW_NEW_GEOMETRY_PROGRAM |
-               BRW_NEW_GS_PROG_DATA,
-   },
-   .emit = genX(upload_gs_push_constants),
-};
-
-static void
-genX(upload_wm_push_constants)(struct brw_context *brw)
-{
-   struct brw_stage_state *stage_state = &brw->wm.base;
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
-   /* BRW_NEW_FS_PROG_DATA */
-   const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
-
-   gfx6_upload_push_constants(brw, fp, prog_data, stage_state);
-}
-
-static const struct brw_tracked_state genX(wm_push_constants) = {
-   .dirty = {
-      .mesa  = _NEW_PROGRAM_CONSTANTS,
-      .brw   = BRW_NEW_BATCH |
-               BRW_NEW_BLORP |
-               BRW_NEW_FRAGMENT_PROGRAM |
-               BRW_NEW_FS_PROG_DATA,
-   },
-   .emit = genX(upload_wm_push_constants),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 6
-static unsigned
-genX(determine_sample_mask)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   float coverage = 1.0f;
-   float coverage_invert = false;
-   unsigned sample_mask = ~0u;
-
-   /* BRW_NEW_NUM_SAMPLES */
-   unsigned num_samples = brw->num_samples;
-
-   if (_mesa_is_multisample_enabled(ctx)) {
-      if (ctx->Multisample.SampleCoverage) {
-         coverage = ctx->Multisample.SampleCoverageValue;
-         coverage_invert = ctx->Multisample.SampleCoverageInvert;
-      }
-      if (ctx->Multisample.SampleMask) {
-         sample_mask = ctx->Multisample.SampleMaskValue;
-      }
-   }
-
-   if (num_samples > 1) {
-      int coverage_int = (int) (num_samples * coverage + 0.5f);
-      uint32_t coverage_bits = (1 << coverage_int) - 1;
-      if (coverage_invert)
-         coverage_bits ^= (1 << num_samples) - 1;
-      return coverage_bits & sample_mask;
-   } else {
-      return 1;
-   }
-}
-
-static void
-genX(emit_3dstate_multisample2)(struct brw_context *brw,
-                                unsigned num_samples)
-{
-   unsigned log2_samples = ffs(num_samples) - 1;
-
-   brw_batch_emit(brw, GENX(3DSTATE_MULTISAMPLE), multi) {
-      multi.PixelLocation = CENTER;
-      multi.NumberofMultisamples = log2_samples;
-#if GFX_VER == 6
-      INTEL_SAMPLE_POS_4X(multi.Sample);
-#elif GFX_VER == 7
-      switch (num_samples) {
-      case 1:
-         INTEL_SAMPLE_POS_1X(multi.Sample);
-         break;
-      case 2:
-         INTEL_SAMPLE_POS_2X(multi.Sample);
-         break;
-      case 4:
-         INTEL_SAMPLE_POS_4X(multi.Sample);
-         break;
-      case 8:
-         INTEL_SAMPLE_POS_8X(multi.Sample);
-         break;
-      default:
-         break;
-      }
-#endif
-   }
-}
-
-static void
-genX(upload_multisample_state)(struct brw_context *brw)
-{
-   assert(brw->num_samples > 0 && brw->num_samples <= 16);
-
-   genX(emit_3dstate_multisample2)(brw, brw->num_samples);
-
-   brw_batch_emit(brw, GENX(3DSTATE_SAMPLE_MASK), sm) {
-      sm.SampleMask = genX(determine_sample_mask)(brw);
-   }
-}
-
-static const struct brw_tracked_state genX(multisample_state) = {
-   .dirty = {
-      .mesa = _NEW_MULTISAMPLE |
-              (GFX_VER == 10 ? _NEW_BUFFERS : 0),
-      .brw = BRW_NEW_BLORP |
-             BRW_NEW_CONTEXT |
-             BRW_NEW_NUM_SAMPLES,
-   },
-   .emit = genX(upload_multisample_state)
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-static void
-genX(upload_color_calc_state)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   brw_state_emit(brw, GENX(COLOR_CALC_STATE), 64, &brw->cc.state_offset, cc) {
-#if GFX_VER <= 5
-      cc.IndependentAlphaBlendEnable =
-         set_blend_entry_bits(brw, &cc, 0, false);
-      set_depth_stencil_bits(brw, &cc);
-
-      if (ctx->Color.AlphaEnabled &&
-          ctx->DrawBuffer->_NumColorDrawBuffers <= 1) {
-         cc.AlphaTestEnable = true;
-         cc.AlphaTestFunction =
-            brw_translate_compare_func(ctx->Color.AlphaFunc);
-      }
-
-      cc.ColorDitherEnable = ctx->Color.DitherFlag;
-
-      cc.StatisticsEnable = brw->stats_wm;
-
-      cc.CCViewportStatePointer =
-         ro_bo(brw->batch.state.bo, brw->cc.vp_offset);
-#else
-      /* _NEW_COLOR */
-      cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0];
-      cc.BlendConstantColorGreen = ctx->Color.BlendColorUnclamped[1];
-      cc.BlendConstantColorBlue = ctx->Color.BlendColorUnclamped[2];
-      cc.BlendConstantColorAlpha = ctx->Color.BlendColorUnclamped[3];
-
-#if GFX_VER < 9
-      /* _NEW_STENCIL */
-      cc.StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0);
-      cc.BackfaceStencilReferenceValue =
-         _mesa_get_stencil_ref(ctx, ctx->Stencil._BackFace);
-#endif
-
-#endif
-
-      /* _NEW_COLOR */
-      UNCLAMPED_FLOAT_TO_UBYTE(cc.AlphaReferenceValueAsUNORM8,
-                               ctx->Color.AlphaRef);
-   }
-
-#if GFX_VER >= 6
-   brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
-      ptr.ColorCalcStatePointer = brw->cc.state_offset;
-#if GFX_VER != 7
-      ptr.ColorCalcStatePointerValid = true;
-#endif
-   }
-#else
-   brw->ctx.NewDriverState |= BRW_NEW_GFX4_UNIT_STATE;
-#endif
-}
-
-UNUSED static const struct brw_tracked_state genX(color_calc_state) = {
-   .dirty = {
-      .mesa = _NEW_COLOR |
-              _NEW_STENCIL |
-              (GFX_VER <= 5 ? _NEW_BUFFERS |
-                              _NEW_DEPTH
-                            : 0),
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             (GFX_VER <= 5 ? BRW_NEW_CC_VP |
-                             BRW_NEW_STATS_WM
-                           : BRW_NEW_CC_STATE |
-                             BRW_NEW_STATE_BASE_ADDRESS),
-   },
-   .emit = genX(upload_color_calc_state),
-};
-
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VERx10 == 75
-static void
-genX(upload_color_calc_and_blend_state)(struct brw_context *brw)
-{
-   genX(upload_blend_state)(brw);
-   genX(upload_color_calc_state)(brw);
-}
-
-/* On Haswell when BLEND_STATE is emitted CC_STATE should also be re-emitted,
- * this workarounds the flickering shadows in several games.
- */
-static const struct brw_tracked_state genX(cc_and_blend_state) = {
-   .dirty = {
-      .mesa = _NEW_BUFFERS |
-              _NEW_COLOR |
-              _NEW_STENCIL |
-              _NEW_MULTISAMPLE,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_CC_STATE |
-             BRW_NEW_FS_PROG_DATA |
-             BRW_NEW_STATE_BASE_ADDRESS,
-   },
-   .emit = genX(upload_color_calc_and_blend_state),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 7
-static void
-genX(upload_sbe)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   UNUSED const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
-   /* BRW_NEW_FS_PROG_DATA */
-   const struct brw_wm_prog_data *wm_prog_data =
-      brw_wm_prog_data(brw->wm.base.prog_data);
-#if GFX_VER >= 8
-   struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = { { 0 } };
-#else
-#define attr_overrides sbe.Attribute
-#endif
-   uint32_t urb_entry_read_length;
-   uint32_t urb_entry_read_offset;
-   uint32_t point_sprite_enables;
-
-   brw_batch_emit(brw, GENX(3DSTATE_SBE), sbe) {
-      sbe.AttributeSwizzleEnable = true;
-      sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
-
-      /* _NEW_BUFFERS */
-      bool flip_y = ctx->DrawBuffer->FlipY;
-
-      /* _NEW_POINT
-       *
-       * Window coordinates in an FBO are inverted, which means point
-       * sprite origin must be inverted.
-       */
-      if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == flip_y)
-         sbe.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
-      else
-         sbe.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
-
-      /* _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM,
-       * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM |
-       * BRW_NEW_GS_PROG_DATA | BRW_NEW_PRIMITIVE | BRW_NEW_TES_PROG_DATA |
-       * BRW_NEW_VUE_MAP_GEOM_OUT
-       */
-      genX(calculate_attr_overrides)(brw,
-                                     attr_overrides,
-                                     &point_sprite_enables,
-                                     &urb_entry_read_length,
-                                     &urb_entry_read_offset);
-
-      /* Typically, the URB entry read length and offset should be programmed
-       * in 3DSTATE_VS and 3DSTATE_GS; SBE inherits it from the last active
-       * stage which produces geometry.  However, we don't know the proper
-       * value until we call calculate_attr_overrides().
-       *
-       * To fit with our existing code, we override the inherited values and
-       * specify it here directly, as we did on previous generations.
-       */
-      sbe.VertexURBEntryReadLength = urb_entry_read_length;
-      sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
-      sbe.PointSpriteTextureCoordinateEnable = point_sprite_enables;
-      sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
-
-#if GFX_VER >= 8
-      sbe.ForceVertexURBEntryReadLength = true;
-      sbe.ForceVertexURBEntryReadOffset = true;
-#endif
-
-#if GFX_VER >= 9
-      /* prepare the active component dwords */
-      for (int i = 0; i < 32; i++)
-         sbe.AttributeActiveComponentFormat[i] = ACTIVE_COMPONENT_XYZW;
-#endif
-   }
-
-#if GFX_VER >= 8
-   brw_batch_emit(brw, GENX(3DSTATE_SBE_SWIZ), sbes) {
-      for (int i = 0; i < 16; i++)
-         sbes.Attribute[i] = attr_overrides[i];
-   }
-#endif
-
-#undef attr_overrides
-}
-
-static const struct brw_tracked_state genX(sbe_state) = {
-   .dirty = {
-      .mesa  = _NEW_BUFFERS |
-               _NEW_LIGHT |
-               _NEW_POINT |
-               _NEW_POLYGON |
-               _NEW_PROGRAM,
-      .brw   = BRW_NEW_BLORP |
-               BRW_NEW_CONTEXT |
-               BRW_NEW_FRAGMENT_PROGRAM |
-               BRW_NEW_FS_PROG_DATA |
-               BRW_NEW_GS_PROG_DATA |
-               BRW_NEW_TES_PROG_DATA |
-               BRW_NEW_VUE_MAP_GEOM_OUT |
-               (GFX_VER == 7 ? BRW_NEW_PRIMITIVE
-                             : 0),
-   },
-   .emit = genX(upload_sbe),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 7
-/**
- * Outputs the 3DSTATE_SO_DECL_LIST command.
- *
- * The data output is a series of 64-bit entries containing a SO_DECL per
- * stream.  We only have one stream of rendering coming out of the GS unit, so
- * we only emit stream 0 (low 16 bits) SO_DECLs.
- */
-static void
-genX(upload_3dstate_so_decl_list)(struct brw_context *brw,
-                                  const struct brw_vue_map *vue_map)
-{
-   struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_TRANSFORM_FEEDBACK */
-   struct gl_transform_feedback_object *xfb_obj =
-      ctx->TransformFeedback.CurrentObject;
-   const struct gl_transform_feedback_info *linked_xfb_info =
-      xfb_obj->program->sh.LinkedTransformFeedback;
-   struct GENX(SO_DECL) so_decl[MAX_VERTEX_STREAMS][128];
-   int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
-   int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
-   int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
-   int max_decls = 0;
-   STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS);
-
-   memset(so_decl, 0, sizeof(so_decl));
-
-   /* Construct the list of SO_DECLs to be emitted.  The formatting of the
-    * command feels strange -- each dword pair contains a SO_DECL per stream.
-    */
-   for (unsigned i = 0; i < linked_xfb_info->NumOutputs; i++) {
-      const struct gl_transform_feedback_output *output =
-         &linked_xfb_info->Outputs[i];
-      const int buffer = output->OutputBuffer;
-      const int varying = output->OutputRegister;
-      const unsigned stream_id = output->StreamId;
-      assert(stream_id < MAX_VERTEX_STREAMS);
-
-      buffer_mask[stream_id] |= 1 << buffer;
-
-      assert(vue_map->varying_to_slot[varying] >= 0);
-
-      /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
-       * array.  Instead, it simply increments DstOffset for the following
-       * input by the number of components that should be skipped.
-       *
-       * Our hardware is unusual in that it requires us to program SO_DECLs
-       * for fake "hole" components, rather than simply taking the offset
-       * for each real varying.  Each hole can have size 1, 2, 3, or 4; we
-       * program as many size = 4 holes as we can, then a final hole to
-       * accommodate the final 1, 2, or 3 remaining.
-       */
-      int skip_components = output->DstOffset - next_offset[buffer];
-
-      while (skip_components > 0) {
-         so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
-            .HoleFlag = 1,
-            .OutputBufferSlot = output->OutputBuffer,
-            .ComponentMask = (1 << MIN2(skip_components, 4)) - 1,
-         };
-         skip_components -= 4;
-      }
-
-      next_offset[buffer] = output->DstOffset + output->NumComponents;
-
-      so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
-         .OutputBufferSlot = output->OutputBuffer,
-         .RegisterIndex = vue_map->varying_to_slot[varying],
-         .ComponentMask =
-            ((1 << output->NumComponents) - 1) << output->ComponentOffset,
-      };
-
-      if (decls[stream_id] > max_decls)
-         max_decls = decls[stream_id];
-   }
-
-   uint32_t *dw;
-   dw = brw_batch_emitn(brw, GENX(3DSTATE_SO_DECL_LIST), 3 + 2 * max_decls,
-                        .StreamtoBufferSelects0 = buffer_mask[0],
-                        .StreamtoBufferSelects1 = buffer_mask[1],
-                        .StreamtoBufferSelects2 = buffer_mask[2],
-                        .StreamtoBufferSelects3 = buffer_mask[3],
-                        .NumEntries0 = decls[0],
-                        .NumEntries1 = decls[1],
-                        .NumEntries2 = decls[2],
-                        .NumEntries3 = decls[3]);
-
-   for (int i = 0; i < max_decls; i++) {
-      GENX(SO_DECL_ENTRY_pack)(
-         brw, dw + 2 + i * 2,
-         &(struct GENX(SO_DECL_ENTRY)) {
-            .Stream0Decl = so_decl[0][i],
-            .Stream1Decl = so_decl[1][i],
-            .Stream2Decl = so_decl[2][i],
-            .Stream3Decl = so_decl[3][i],
-         });
-   }
-}
-
-static void
-genX(upload_3dstate_so_buffers)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_TRANSFORM_FEEDBACK */
-   struct gl_transform_feedback_object *xfb_obj =
-      ctx->TransformFeedback.CurrentObject;
-#if GFX_VER < 8
-   const struct gl_transform_feedback_info *linked_xfb_info =
-      xfb_obj->program->sh.LinkedTransformFeedback;
-#else
-   struct brw_transform_feedback_object *brw_obj =
-      (struct brw_transform_feedback_object *) xfb_obj;
-#endif
-
-   /* Set up the up to 4 output buffers.  These are the ranges defined in the
-    * gl_transform_feedback_object.
-    */
-   for (int i = 0; i < 4; i++) {
-      struct brw_buffer_object *bufferobj =
-         brw_buffer_object(xfb_obj->Buffers[i]);
-      uint32_t start = xfb_obj->Offset[i];
-      uint32_t end = ALIGN(start + xfb_obj->Size[i], 4);
-      uint32_t const size = end - start;
-
-      if (!bufferobj || !size) {
-         brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) {
-            sob.SOBufferIndex = i;
-            sob.MOCS = brw_mocs(&brw->isl_dev, NULL);
-         }
-         continue;
-      }
-
-      assert(start % 4 == 0);
-      struct brw_bo *bo =
-         brw_bufferobj_buffer(brw, bufferobj, start, size, true);
-      assert(end <= bo->size);
-
-      brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) {
-         sob.SOBufferIndex = i;
-
-         sob.SurfaceBaseAddress = rw_bo(bo, start);
-         sob.MOCS = brw_mocs(&brw->isl_dev, bo);
-#if GFX_VER < 8
-         sob.SurfacePitch = linked_xfb_info->Buffers[i].Stride * 4;
-         sob.SurfaceEndAddress = rw_bo(bo, end);
-#else
-         sob.SOBufferEnable = true;
-         sob.StreamOffsetWriteEnable = true;
-         sob.StreamOutputBufferOffsetAddressEnable = true;
-
-         sob.SurfaceSize = MAX2(xfb_obj->Size[i] / 4, 1) - 1;
-         sob.StreamOutputBufferOffsetAddress =
-            rw_bo(brw_obj->offset_bo, i * sizeof(uint32_t));
-
-         if (brw_obj->zero_offsets) {
-            /* Zero out the offset and write that to offset_bo */
-            sob.StreamOffset = 0;
-         } else {
-            /* Use offset_bo as the "Stream Offset." */
-            sob.StreamOffset = 0xFFFFFFFF;
-         }
-#endif
-      }
-   }
-
-#if GFX_VER >= 8
-   brw_obj->zero_offsets = false;
-#endif
-}
-
-static bool
-query_active(struct gl_query_object *q)
-{
-   return q && q->Active;
-}
-
-static void
-genX(upload_3dstate_streamout)(struct brw_context *brw, bool active,
-                               const struct brw_vue_map *vue_map)
-{
-   struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_TRANSFORM_FEEDBACK */
-   struct gl_transform_feedback_object *xfb_obj =
-      ctx->TransformFeedback.CurrentObject;
-
-   brw_batch_emit(brw, GENX(3DSTATE_STREAMOUT), sos) {
-      if (active) {
-         int urb_entry_read_offset = 0;
-         int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
-            urb_entry_read_offset;
-
-         sos.SOFunctionEnable = true;
-         sos.SOStatisticsEnable = true;
-
-         /* BRW_NEW_RASTERIZER_DISCARD */
-         if (ctx->RasterDiscard) {
-            if (!query_active(ctx->Query.PrimitivesGenerated[0])) {
-               sos.RenderingDisable = true;
-            } else {
-               perf_debug("Rasterizer discard with a GL_PRIMITIVES_GENERATED "
-                          "query active relies on the clipper.\n");
-            }
-         }
-
-         /* _NEW_LIGHT */
-         if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION)
-            sos.ReorderMode = TRAILING;
-
-#if GFX_VER < 8
-         sos.SOBufferEnable0 = xfb_obj->Buffers[0] != NULL;
-         sos.SOBufferEnable1 = xfb_obj->Buffers[1] != NULL;
-         sos.SOBufferEnable2 = xfb_obj->Buffers[2] != NULL;
-         sos.SOBufferEnable3 = xfb_obj->Buffers[3] != NULL;
-#else
-         const struct gl_transform_feedback_info *linked_xfb_info =
-            xfb_obj->program->sh.LinkedTransformFeedback;
-         /* Set buffer pitches; 0 means unbound. */
-         if (xfb_obj->Buffers[0])
-            sos.Buffer0SurfacePitch = linked_xfb_info->Buffers[0].Stride * 4;
-         if (xfb_obj->Buffers[1])
-            sos.Buffer1SurfacePitch = linked_xfb_info->Buffers[1].Stride * 4;
-         if (xfb_obj->Buffers[2])
-            sos.Buffer2SurfacePitch = linked_xfb_info->Buffers[2].Stride * 4;
-         if (xfb_obj->Buffers[3])
-            sos.Buffer3SurfacePitch = linked_xfb_info->Buffers[3].Stride * 4;
-#endif
-
-         /* We always read the whole vertex.  This could be reduced at some
-          * point by reading less and offsetting the register index in the
-          * SO_DECLs.
-          */
-         sos.Stream0VertexReadOffset = urb_entry_read_offset;
-         sos.Stream0VertexReadLength = urb_entry_read_length - 1;
-         sos.Stream1VertexReadOffset = urb_entry_read_offset;
-         sos.Stream1VertexReadLength = urb_entry_read_length - 1;
-         sos.Stream2VertexReadOffset = urb_entry_read_offset;
-         sos.Stream2VertexReadLength = urb_entry_read_length - 1;
-         sos.Stream3VertexReadOffset = urb_entry_read_offset;
-         sos.Stream3VertexReadLength = urb_entry_read_length - 1;
-      }
-   }
-}
-
-static void
-genX(upload_sol)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_TRANSFORM_FEEDBACK */
-   bool active = _mesa_is_xfb_active_and_unpaused(ctx);
-
-   if (active) {
-      genX(upload_3dstate_so_buffers)(brw);
-
-      /* BRW_NEW_VUE_MAP_GEOM_OUT */
-      genX(upload_3dstate_so_decl_list)(brw, &brw->vue_map_geom_out);
-   }
-
-   /* Finally, set up the SOL stage.  This command must always follow updates to
-    * the nonpipelined SOL state (3DSTATE_SO_BUFFER, 3DSTATE_SO_DECL_LIST) or
-    * MMIO register updates (current performed by the kernel at each batch
-    * emit).
-    */
-   genX(upload_3dstate_streamout)(brw, active, &brw->vue_map_geom_out);
-}
-
-static const struct brw_tracked_state genX(sol_state) = {
-   .dirty = {
-      .mesa  = _NEW_LIGHT,
-      .brw   = BRW_NEW_BATCH |
-               BRW_NEW_BLORP |
-               BRW_NEW_RASTERIZER_DISCARD |
-               BRW_NEW_VUE_MAP_GEOM_OUT |
-               BRW_NEW_TRANSFORM_FEEDBACK,
-   },
-   .emit = genX(upload_sol),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 7
-static void
-genX(upload_ps)(struct brw_context *brw)
-{
-   UNUSED const struct gl_context *ctx = &brw->ctx;
-   UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   /* BRW_NEW_FS_PROG_DATA */
-   const struct brw_wm_prog_data *prog_data =
-      brw_wm_prog_data(brw->wm.base.prog_data);
-   const struct brw_stage_state *stage_state = &brw->wm.base;
-
-#if GFX_VER < 8
-#endif
-
-   brw_batch_emit(brw, GENX(3DSTATE_PS), ps) {
-      /* Initialize the execution mask with VMask.  Otherwise, derivatives are
-       * incorrect for subspans where some of the pixels are unlit.  We believe
-       * the bit just didn't take effect in previous generations.
-       */
-      ps.VectorMaskEnable = GFX_VER >= 8;
-
-      /* Wa_1606682166:
-       * "Incorrect TDL's SSP address shift in SARB for 16:6 & 18:8 modes.
-       * Disable the Sampler state prefetch functionality in the SARB by
-       * programming 0xB000[30] to '1'."
-       */
-      ps.SamplerCount = GFX_VER == 11 ?
-         0 : DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);
-
-      /* BRW_NEW_FS_PROG_DATA */
-      ps.BindingTableEntryCount = prog_data->base.binding_table.size_bytes / 4;
-
-      if (prog_data->base.use_alt_mode)
-         ps.FloatingPointMode = Alternate;
-
-      /* Haswell requires the sample mask to be set in this packet as well as
-       * in 3DSTATE_SAMPLE_MASK; the values should match.
-       */
-
-      /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
-#if GFX_VERx10 == 75
-      ps.SampleMask = genX(determine_sample_mask(brw));
-#endif
-
-      /* 3DSTATE_PS expects the number of threads per PSD, which is always 64
-       * for pre Gfx11 and 128 for gfx11+; On gfx11+ If a programmed value is
-       * k, it implies 2(k+1) threads. It implicitly scales for different GT
-       * levels (which have some # of PSDs).
-       *
-       * In Gfx8 the format is U8-2 whereas in Gfx9+ it is U9-1.
-       */
-#if GFX_VER >= 9
-      ps.MaximumNumberofThreadsPerPSD = 64 - 1;
-#elif GFX_VER >= 8
-      ps.MaximumNumberofThreadsPerPSD = 64 - 2;
-#else
-      ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
-#endif
-
-      if (prog_data->base.nr_params > 0 ||
-          prog_data->base.ubo_ranges[0].length > 0)
-         ps.PushConstantEnable = true;
-
-#if GFX_VER < 8
-      /* From the IVB PRM, volume 2 part 1, page 287:
-       * "This bit is inserted in the PS payload header and made available to
-       * the DataPort (either via the message header or via header bypass) to
-       * indicate that oMask data (one or two phases) is included in Render
-       * Target Write messages. If present, the oMask data is used to mask off
-       * samples."
-       */
-      ps.oMaskPresenttoRenderTarget = prog_data->uses_omask;
-
-      /* The hardware wedges if you have this bit set but don't turn on any
-       * dual source blend factors.
-       *
-       * BRW_NEW_FS_PROG_DATA | _NEW_COLOR
-       */
-      ps.DualSourceBlendEnable = prog_data->dual_src_blend &&
-                                 (ctx->Color.BlendEnabled & 1) &&
-                                 ctx->Color._BlendUsesDualSrc & 0x1;
-
-      /* BRW_NEW_FS_PROG_DATA */
-      ps.AttributeEnable = (prog_data->num_varying_inputs != 0);
-#endif
-
-      /* From the documentation for this packet:
-       * "If the PS kernel does not need the Position XY Offsets to
-       *  compute a Position Value, then this field should be programmed
-       *  to POSOFFSET_NONE."
-       *
-       * "SW Recommendation: If the PS kernel needs the Position Offsets
-       *  to compute a Position XY value, this field should match Position
-       *  ZW Interpolation Mode to ensure a consistent position.xyzw
-       *  computation."
-       *
-       * We only require XY sample offsets. So, this recommendation doesn't
-       * look useful at the moment. We might need this in future.
-       */
-      if (prog_data->uses_pos_offset)
-         ps.PositionXYOffsetSelect = POSOFFSET_SAMPLE;
-      else
-         ps.PositionXYOffsetSelect = POSOFFSET_NONE;
-
-      ps._8PixelDispatchEnable = prog_data->dispatch_8;
-      ps._16PixelDispatchEnable = prog_data->dispatch_16;
-      ps._32PixelDispatchEnable = prog_data->dispatch_32;
-
-      /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable:
-       *
-       *    "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32
-       *    Dispatch must not be enabled for PER_PIXEL dispatch mode."
-       *
-       * Since 16x MSAA is first introduced on SKL, we don't need to apply
-       * the workaround on any older hardware.
-       *
-       * BRW_NEW_NUM_SAMPLES
-       */
-      if (GFX_VER >= 9 && !prog_data->persample_dispatch &&
-          brw->num_samples == 16) {
-         assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable);
-         ps._32PixelDispatchEnable = false;
-      }
-
-      ps.DispatchGRFStartRegisterForConstantSetupData0 =
-         brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
-      ps.DispatchGRFStartRegisterForConstantSetupData1 =
-         brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
-      ps.DispatchGRFStartRegisterForConstantSetupData2 =
-         brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
-
-      ps.KernelStartPointer0 = stage_state->prog_offset +
-                               brw_wm_prog_data_prog_offset(prog_data, ps, 0);
-      ps.KernelStartPointer1 = stage_state->prog_offset +
-                               brw_wm_prog_data_prog_offset(prog_data, ps, 1);
-      ps.KernelStartPointer2 = stage_state->prog_offset +
-                               brw_wm_prog_data_prog_offset(prog_data, ps, 2);
-
-      if (prog_data->base.total_scratch) {
-         ps.ScratchSpaceBasePointer =
-            rw_32_bo(stage_state->scratch_bo,
-                     ffs(stage_state->per_thread_scratch) - 11);
-      }
-   }
-}
-
-static const struct brw_tracked_state genX(ps_state) = {
-   .dirty = {
-      .mesa  = _NEW_MULTISAMPLE |
-               (GFX_VER < 8 ? _NEW_BUFFERS |
-                              _NEW_COLOR
-                            : 0),
-      .brw   = BRW_NEW_BATCH |
-               BRW_NEW_BLORP |
-               BRW_NEW_FS_PROG_DATA |
-               (GFX_VER >= 9 ? BRW_NEW_NUM_SAMPLES : 0),
-   },
-   .emit = genX(upload_ps),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 7
-static void
-genX(upload_hs_state)(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct brw_stage_state *stage_state = &brw->tcs.base;
-   struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
-   const struct brw_vue_prog_data *vue_prog_data =
-      brw_vue_prog_data(stage_prog_data);
-
-   /* BRW_NEW_TES_PROG_DATA */
-   struct brw_tcs_prog_data *tcs_prog_data =
-      brw_tcs_prog_data(stage_prog_data);
-
-   if (!tcs_prog_data) {
-      brw_batch_emit(brw, GENX(3DSTATE_HS), hs);
-   } else {
-      brw_batch_emit(brw, GENX(3DSTATE_HS), hs) {
-         INIT_THREAD_DISPATCH_FIELDS(hs, Vertex);
-
-         hs.InstanceCount = tcs_prog_data->instances - 1;
-         hs.IncludeVertexHandles = true;
-
-         hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
-
-#if GFX_VER >= 9
-         hs.DispatchMode = vue_prog_data->dispatch_mode;
-         hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id;
-#endif
-      }
-   }
-}
-
-static const struct brw_tracked_state genX(hs_state) = {
-   .dirty = {
-      .mesa  = 0,
-      .brw   = BRW_NEW_BATCH |
-               BRW_NEW_BLORP |
-               BRW_NEW_TCS_PROG_DATA |
-               BRW_NEW_TESS_PROGRAMS,
-   },
-   .emit = genX(upload_hs_state),
-};
-
-static void
-genX(upload_ds_state)(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const struct brw_stage_state *stage_state = &brw->tes.base;
-   struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
-
-   /* BRW_NEW_TES_PROG_DATA */
-   const struct brw_tes_prog_data *tes_prog_data =
-      brw_tes_prog_data(stage_prog_data);
-   const struct brw_vue_prog_data *vue_prog_data =
-      brw_vue_prog_data(stage_prog_data);
-
-   if (!tes_prog_data) {
-      brw_batch_emit(brw, GENX(3DSTATE_DS), ds);
-   } else {
-      assert(GFX_VER < 11 ||
-             vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8);
-
-      brw_batch_emit(brw, GENX(3DSTATE_DS), ds) {
-         INIT_THREAD_DISPATCH_FIELDS(ds, Patch);
-
-        ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
-        ds.ComputeWCoordinateEnable =
-           tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;
-
-#if GFX_VER >= 8
-        if (vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8)
-           ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;
-        ds.UserClipDistanceCullTestEnableBitmask =
-            vue_prog_data->cull_distance_mask;
-#endif
-      }
-   }
-}
-
-static const struct brw_tracked_state genX(ds_state) = {
-   .dirty = {
-      .mesa  = 0,
-      .brw   = BRW_NEW_BATCH |
-               BRW_NEW_BLORP |
-               BRW_NEW_TESS_PROGRAMS |
-               BRW_NEW_TES_PROG_DATA,
-   },
-   .emit = genX(upload_ds_state),
-};
-
-/* ---------------------------------------------------------------------- */
-
-static void
-upload_te_state(struct brw_context *brw)
-{
-   /* BRW_NEW_TESS_PROGRAMS */
-   bool active = brw->programs[MESA_SHADER_TESS_EVAL];
-
-   /* BRW_NEW_TES_PROG_DATA */
-   const struct brw_tes_prog_data *tes_prog_data =
-      brw_tes_prog_data(brw->tes.base.prog_data);
-
-   if (active) {
-      brw_batch_emit(brw, GENX(3DSTATE_TE), te) {
-         te.Partitioning = tes_prog_data->partitioning;
-         te.OutputTopology = tes_prog_data->output_topology;
-         te.TEDomain = tes_prog_data->domain;
-         te.TEEnable = true;
-         te.MaximumTessellationFactorOdd = 63.0;
-         te.MaximumTessellationFactorNotOdd = 64.0;
-      }
-   } else {
-      brw_batch_emit(brw, GENX(3DSTATE_TE), te);
-   }
-}
-
-static const struct brw_tracked_state genX(te_state) = {
-   .dirty = {
-      .mesa  = 0,
-      .brw   = BRW_NEW_BLORP |
-               BRW_NEW_CONTEXT |
-               BRW_NEW_TES_PROG_DATA |
-               BRW_NEW_TESS_PROGRAMS,
-   },
-   .emit = upload_te_state,
-};
-
-/* ---------------------------------------------------------------------- */
-
-static void
-genX(upload_tes_push_constants)(struct brw_context *brw)
-{
-   struct brw_stage_state *stage_state = &brw->tes.base;
-   /* BRW_NEW_TESS_PROGRAMS */
-   const struct gl_program *tep = brw->programs[MESA_SHADER_TESS_EVAL];
-
-   /* BRW_NEW_TES_PROG_DATA */
-   const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
-   gfx6_upload_push_constants(brw, tep, prog_data, stage_state);
-}
-
-static const struct brw_tracked_state genX(tes_push_constants) = {
-   .dirty = {
-      .mesa  = _NEW_PROGRAM_CONSTANTS,
-      .brw   = BRW_NEW_BATCH |
-               BRW_NEW_BLORP |
-               BRW_NEW_TESS_PROGRAMS |
-               BRW_NEW_TES_PROG_DATA,
-   },
-   .emit = genX(upload_tes_push_constants),
-};
-
-static void
-genX(upload_tcs_push_constants)(struct brw_context *brw)
-{
-   struct brw_stage_state *stage_state = &brw->tcs.base;
-   /* BRW_NEW_TESS_PROGRAMS */
-   const struct gl_program *tcp = brw->programs[MESA_SHADER_TESS_CTRL];
-
-   /* BRW_NEW_TCS_PROG_DATA */
-   const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
-
-   gfx6_upload_push_constants(brw, tcp, prog_data, stage_state);
-}
-
-static const struct brw_tracked_state genX(tcs_push_constants) = {
-   .dirty = {
-      .mesa  = _NEW_PROGRAM_CONSTANTS,
-      .brw   = BRW_NEW_BATCH |
-               BRW_NEW_BLORP |
-               BRW_NEW_DEFAULT_TESS_LEVELS |
-               BRW_NEW_TESS_PROGRAMS |
-               BRW_NEW_TCS_PROG_DATA,
-   },
-   .emit = genX(upload_tcs_push_constants),
-};
-
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 7
-static void
-genX(upload_cs_push_constants)(struct brw_context *brw)
-{
-   struct brw_stage_state *stage_state = &brw->cs.base;
-
-   /* BRW_NEW_COMPUTE_PROGRAM */
-   const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE];
-
-   if (cp) {
-      /* BRW_NEW_CS_PROG_DATA */
-      struct brw_cs_prog_data *cs_prog_data =
-         brw_cs_prog_data(brw->cs.base.prog_data);
-
-      _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_COMPUTE);
-      brw_upload_cs_push_constants(brw, cp, cs_prog_data, stage_state);
-   }
-}
-
-const struct brw_tracked_state genX(cs_push_constants) = {
-   .dirty = {
-      .mesa = _NEW_PROGRAM_CONSTANTS,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_COMPUTE_PROGRAM |
-             BRW_NEW_CS_PROG_DATA,
-   },
-   .emit = genX(upload_cs_push_constants),
-};
-
-/**
- * Creates a new CS constant buffer reflecting the current CS program's
- * constants, if needed by the CS program.
- */
-static void
-genX(upload_cs_pull_constants)(struct brw_context *brw)
-{
-   struct brw_stage_state *stage_state = &brw->cs.base;
-
-   /* BRW_NEW_COMPUTE_PROGRAM */
-   struct brw_program *cp =
-      (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
-
-   /* BRW_NEW_CS_PROG_DATA */
-   const struct brw_stage_prog_data *prog_data = brw->cs.base.prog_data;
-
-   _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_COMPUTE);
-   /* _NEW_PROGRAM_CONSTANTS */
-   brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &cp->program,
-                             stage_state, prog_data);
-}
-
-const struct brw_tracked_state genX(cs_pull_constants) = {
-   .dirty = {
-      .mesa = _NEW_PROGRAM_CONSTANTS,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_COMPUTE_PROGRAM |
-             BRW_NEW_CS_PROG_DATA,
-   },
-   .emit = genX(upload_cs_pull_constants),
-};
-
-static void
-genX(upload_cs_state)(struct brw_context *brw)
-{
-   if (!brw->cs.base.prog_data)
-      return;
-
-   uint32_t offset;
-   uint32_t *desc = (uint32_t*) brw_state_batch(
-      brw, GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t), 64,
-      &offset);
-
-   struct brw_stage_state *stage_state = &brw->cs.base;
-   struct brw_stage_prog_data *prog_data = stage_state->prog_data;
-   struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   const struct brw_cs_dispatch_info dispatch =
-      brw_cs_get_dispatch_info(devinfo, cs_prog_data, brw->compute.group_size);
-
-   if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
-      brw_emit_buffer_surface_state(
-         brw, &stage_state->surf_offset[
-                 prog_data->binding_table.shader_time_start],
-         brw->shader_time.bo, 0, ISL_FORMAT_RAW,
-         brw->shader_time.bo->size, 1,
-         RELOC_WRITE);
-   }
-
-   uint32_t *bind = brw_state_batch(brw, prog_data->binding_table.size_bytes,
-                                    32, &stage_state->bind_bo_offset);
-
-   /* The MEDIA_VFE_STATE documentation for Gfx8+ says:
-    *
-    * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
-    *  the only bits that are changed are scoreboard related: Scoreboard
-    *  Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For
-    *  these scoreboard related states, a MEDIA_STATE_FLUSH is sufficient."
-    *
-    * Earlier generations say "MI_FLUSH" instead of "stalling PIPE_CONTROL",
-    * but MI_FLUSH isn't really a thing, so we assume they meant PIPE_CONTROL.
-    */
-   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
-
-   brw_batch_emit(brw, GENX(MEDIA_VFE_STATE), vfe) {
-      if (prog_data->total_scratch) {
-         uint32_t per_thread_scratch_value;
-
-         if (GFX_VER >= 8) {
-            /* Broadwell's Per Thread Scratch Space is in the range [0, 11]
-             * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
-             */
-            per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 11;
-         } else if (GFX_VERx10 == 75) {
-            /* Haswell's Per Thread Scratch Space is in the range [0, 10]
-             * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
-             */
-            per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 12;
-         } else {
-            /* Earlier platforms use the range [0, 11] to mean [1kB, 12kB]
-             * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
-             */
-            per_thread_scratch_value = stage_state->per_thread_scratch / 1024 - 1;
-         }
-         vfe.ScratchSpaceBasePointer = rw_32_bo(stage_state->scratch_bo, 0);
-         vfe.PerThreadScratchSpace = per_thread_scratch_value;
-      }
-
-      vfe.MaximumNumberofThreads =
-         devinfo->max_cs_threads * devinfo->subslice_total - 1;
-      vfe.NumberofURBEntries = GFX_VER >= 8 ? 2 : 0;
-#if GFX_VER < 11
-      vfe.ResetGatewayTimer =
-         Resettingrelativetimerandlatchingtheglobaltimestamp;
-#endif
-#if GFX_VER < 9
-      vfe.BypassGatewayControl = BypassingOpenGatewayCloseGatewayprotocol;
-#endif
-#if GFX_VER == 7
-      vfe.GPGPUMode = true;
-#endif
-
-      /* We are uploading duplicated copies of push constant uniforms for each
-       * thread. Although the local id data needs to vary per thread, it won't
-       * change for other uniform data. Unfortunately this duplication is
-       * required for gfx7. As of Haswell, this duplication can be avoided,
-       * but this older mechanism with duplicated data continues to work.
-       *
-       * FINISHME: As of Haswell, we could make use of the
-       * INTERFACE_DESCRIPTOR_DATA "Cross-Thread Constant Data Read Length"
-       * field to only store one copy of uniform data.
-       *
-       * FINISHME: Broadwell adds a new alternative "Indirect Payload Storage"
-       * which is described in the GPGPU_WALKER command and in the Broadwell
-       * PRM Volume 7: 3D Media GPGPU, under Media GPGPU Pipeline => Mode of
-       * Operations => GPGPU Mode => Indirect Payload Storage.
-       *
-       * Note: The constant data is built in brw_upload_cs_push_constants
-       * below.
-       */
-      vfe.URBEntryAllocationSize = GFX_VER >= 8 ? 2 : 0;
-
-      const uint32_t vfe_curbe_allocation =
-         ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
-               cs_prog_data->push.cross_thread.regs, 2);
-      vfe.CURBEAllocationSize = vfe_curbe_allocation;
-   }
-
-   const unsigned push_const_size =
-      brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);
-   if (push_const_size > 0) {
-      brw_batch_emit(brw, GENX(MEDIA_CURBE_LOAD), curbe) {
-         curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
-         curbe.CURBEDataStartAddress = stage_state->push_const_offset;
-      }
-   }
-
-   /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
-   memcpy(bind, stage_state->surf_offset,
-          prog_data->binding_table.size_bytes);
-   const uint64_t ksp = brw->cs.base.prog_offset +
-                        brw_cs_prog_data_prog_offset(cs_prog_data,
-                                                     dispatch.simd_size);
-   const struct GENX(INTERFACE_DESCRIPTOR_DATA) idd = {
-      .KernelStartPointer = ksp,
-      .SamplerStatePointer = stage_state->sampler_offset,
-      /* Wa_1606682166 */
-      .SamplerCount = GFX_VER == 11 ? 0 :
-                      DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4),
-      .BindingTablePointer = stage_state->bind_bo_offset,
-      .ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
-      .NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
-      .SharedLocalMemorySize = encode_slm_size(GFX_VER,
-                                               prog_data->total_shared),
-      .BarrierEnable = cs_prog_data->uses_barrier,
-#if GFX_VERx10 >= 75
-      .CrossThreadConstantDataReadLength =
-         cs_prog_data->push.cross_thread.regs,
-#endif
-   };
-
-   GENX(INTERFACE_DESCRIPTOR_DATA_pack)(brw, desc, &idd);
-
-   brw_batch_emit(brw, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) {
-      load.InterfaceDescriptorTotalLength =
-         GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
-      load.InterfaceDescriptorDataStartAddress = offset;
-   }
-}
-
-static const struct brw_tracked_state genX(cs_state) = {
-   .dirty = {
-      .mesa = _NEW_PROGRAM_CONSTANTS,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_CS_PROG_DATA |
-             BRW_NEW_SAMPLER_STATE_TABLE |
-             BRW_NEW_SURFACES,
-   },
-   .emit = genX(upload_cs_state)
-};
-
-#define GPGPU_DISPATCHDIMX 0x2500
-#define GPGPU_DISPATCHDIMY 0x2504
-#define GPGPU_DISPATCHDIMZ 0x2508
-
-#define MI_PREDICATE_SRC0  0x2400
-#define MI_PREDICATE_SRC1  0x2408
-
-static void
-prepare_indirect_gpgpu_walker(struct brw_context *brw)
-{
-   GLintptr indirect_offset = brw->compute.num_work_groups_offset;
-   struct brw_bo *bo = brw->compute.num_work_groups_bo;
-
-   emit_lrm(brw, GPGPU_DISPATCHDIMX, ro_bo(bo, indirect_offset + 0));
-   emit_lrm(brw, GPGPU_DISPATCHDIMY, ro_bo(bo, indirect_offset + 4));
-   emit_lrm(brw, GPGPU_DISPATCHDIMZ, ro_bo(bo, indirect_offset + 8));
-
-#if GFX_VER <= 7
-   /* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */
-   emit_lri(brw, MI_PREDICATE_SRC0 + 4, 0);
-   emit_lri(brw, MI_PREDICATE_SRC1    , 0);
-   emit_lri(brw, MI_PREDICATE_SRC1 + 4, 0);
-
-   /* Load compute_dispatch_indirect_x_size into SRC0 */
-   emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 0));
-
-   /* predicate = (compute_dispatch_indirect_x_size == 0); */
-   brw_batch_emit(brw, GENX(MI_PREDICATE), mip) {
-      mip.LoadOperation    = LOAD_LOAD;
-      mip.CombineOperation = COMBINE_SET;
-      mip.CompareOperation = COMPARE_SRCS_EQUAL;
-   }
-
-   /* Load compute_dispatch_indirect_y_size into SRC0 */
-   emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 4));
-
-   /* predicate |= (compute_dispatch_indirect_y_size == 0); */
-   brw_batch_emit(brw, GENX(MI_PREDICATE), mip) {
-      mip.LoadOperation    = LOAD_LOAD;
-      mip.CombineOperation = COMBINE_OR;
-      mip.CompareOperation = COMPARE_SRCS_EQUAL;
-   }
-
-   /* Load compute_dispatch_indirect_z_size into SRC0 */
-   emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 8));
-
-   /* predicate |= (compute_dispatch_indirect_z_size == 0); */
-   brw_batch_emit(brw, GENX(MI_PREDICATE), mip) {
-      mip.LoadOperation    = LOAD_LOAD;
-      mip.CombineOperation = COMBINE_OR;
-      mip.CompareOperation = COMPARE_SRCS_EQUAL;
-   }
-
-   /* predicate = !predicate; */
-#define COMPARE_FALSE                           1
-   brw_batch_emit(brw, GENX(MI_PREDICATE), mip) {
-      mip.LoadOperation    = LOAD_LOADINV;
-      mip.CombineOperation = COMBINE_OR;
-      mip.CompareOperation = COMPARE_FALSE;
-   }
-#endif
-}
-
-static void
-genX(emit_gpgpu_walker)(struct brw_context *brw)
-{
-   const GLuint *num_groups = brw->compute.num_work_groups;
-
-   bool indirect = brw->compute.num_work_groups_bo != NULL;
-   if (indirect)
-      prepare_indirect_gpgpu_walker(brw);
-
-   const struct brw_cs_dispatch_info dispatch =
-      brw_cs_get_dispatch_info(&brw->screen->devinfo,
-                               brw_cs_prog_data(brw->cs.base.prog_data),
-                               brw->compute.group_size);
-
-   brw_batch_emit(brw, GENX(GPGPU_WALKER), ggw) {
-      ggw.IndirectParameterEnable      = indirect;
-      ggw.PredicateEnable              = GFX_VER <= 7 && indirect;
-      ggw.SIMDSize                     = dispatch.simd_size / 16;
-      ggw.ThreadDepthCounterMaximum    = 0;
-      ggw.ThreadHeightCounterMaximum   = 0;
-      ggw.ThreadWidthCounterMaximum    = dispatch.threads - 1;
-      ggw.ThreadGroupIDXDimension      = num_groups[0];
-      ggw.ThreadGroupIDYDimension      = num_groups[1];
-      ggw.ThreadGroupIDZDimension      = num_groups[2];
-      ggw.RightExecutionMask           = dispatch.right_mask;
-      ggw.BottomExecutionMask          = 0xffffffff;
-   }
-
-   brw_batch_emit(brw, GENX(MEDIA_STATE_FLUSH), msf);
-}
-
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 8
-static void
-genX(upload_raster)(struct brw_context *brw)
-{
-   const struct gl_context *ctx = &brw->ctx;
-
-   /* _NEW_BUFFERS */
-   const bool flip_y = ctx->DrawBuffer->FlipY;
-
-   /* _NEW_POLYGON */
-   const struct gl_polygon_attrib *polygon = &ctx->Polygon;
-
-   /* _NEW_POINT */
-   const struct gl_point_attrib *point = &ctx->Point;
-
-   brw_batch_emit(brw, GENX(3DSTATE_RASTER), raster) {
-      if (brw->polygon_front_bit != flip_y)
-         raster.FrontWinding = CounterClockwise;
-
-      if (polygon->CullFlag) {
-         switch (polygon->CullFaceMode) {
-         case GL_FRONT:
-            raster.CullMode = CULLMODE_FRONT;
-            break;
-         case GL_BACK:
-            raster.CullMode = CULLMODE_BACK;
-            break;
-         case GL_FRONT_AND_BACK:
-            raster.CullMode = CULLMODE_BOTH;
-            break;
-         default:
-            unreachable("not reached");
-         }
-      } else {
-         raster.CullMode = CULLMODE_NONE;
-      }
-
-      raster.SmoothPointEnable = point->SmoothFlag;
-
-      raster.DXMultisampleRasterizationEnable =
-         _mesa_is_multisample_enabled(ctx);
-
-      raster.GlobalDepthOffsetEnableSolid = polygon->OffsetFill;
-      raster.GlobalDepthOffsetEnableWireframe = polygon->OffsetLine;
-      raster.GlobalDepthOffsetEnablePoint = polygon->OffsetPoint;
-
-      switch (polygon->FrontMode) {
-      case GL_FILL:
-         raster.FrontFaceFillMode = FILL_MODE_SOLID;
-         break;
-      case GL_LINE:
-         raster.FrontFaceFillMode = FILL_MODE_WIREFRAME;
-         break;
-      case GL_POINT:
-         raster.FrontFaceFillMode = FILL_MODE_POINT;
-         break;
-      default:
-         unreachable("not reached");
-      }
-
-      switch (polygon->BackMode) {
-      case GL_FILL:
-         raster.BackFaceFillMode = FILL_MODE_SOLID;
-         break;
-      case GL_LINE:
-         raster.BackFaceFillMode = FILL_MODE_WIREFRAME;
-         break;
-      case GL_POINT:
-         raster.BackFaceFillMode = FILL_MODE_POINT;
-         break;
-      default:
-         unreachable("not reached");
-      }
-
-      /* _NEW_LINE */
-      raster.AntialiasingEnable = ctx->Line.SmoothFlag;
-
-#if GFX_VER == 10
-      /* _NEW_BUFFERS
-       * Antialiasing Enable bit MUST not be set when NUM_MULTISAMPLES > 1.
-       */
-      const bool multisampled_fbo =
-         _mesa_geometric_samples(ctx->DrawBuffer) > 1;
-      if (multisampled_fbo)
-         raster.AntialiasingEnable = false;
-#endif
-
-      /* _NEW_SCISSOR */
-      raster.ScissorRectangleEnable = ctx->Scissor.EnableFlags;
-
-      /* _NEW_TRANSFORM */
-#if GFX_VER < 9
-      if (!(ctx->Transform.DepthClampNear &&
-            ctx->Transform.DepthClampFar))
-         raster.ViewportZClipTestEnable = true;
-#endif
-
-#if GFX_VER >= 9
-      if (!ctx->Transform.DepthClampNear)
-         raster.ViewportZNearClipTestEnable = true;
-
-      if (!ctx->Transform.DepthClampFar)
-         raster.ViewportZFarClipTestEnable = true;
-#endif
-
-      /* BRW_NEW_CONSERVATIVE_RASTERIZATION */
-#if GFX_VER >= 9
-      raster.ConservativeRasterizationEnable =
-         ctx->IntelConservativeRasterization;
-#endif
-
-      raster.GlobalDepthOffsetClamp = polygon->OffsetClamp;
-      raster.GlobalDepthOffsetScale = polygon->OffsetFactor;
-
-      raster.GlobalDepthOffsetConstant = polygon->OffsetUnits * 2;
-   }
-}
-
-static const struct brw_tracked_state genX(raster_state) = {
-   .dirty = {
-      .mesa  = _NEW_BUFFERS |
-               _NEW_LINE |
-               _NEW_MULTISAMPLE |
-               _NEW_POINT |
-               _NEW_POLYGON |
-               _NEW_SCISSOR |
-               _NEW_TRANSFORM,
-      .brw   = BRW_NEW_BLORP |
-               BRW_NEW_CONTEXT |
-               BRW_NEW_CONSERVATIVE_RASTERIZATION,
-   },
-   .emit = genX(upload_raster),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 8
-static void
-genX(upload_ps_extra)(struct brw_context *brw)
-{
-   UNUSED struct gl_context *ctx = &brw->ctx;
-
-   const struct brw_wm_prog_data *prog_data =
-      brw_wm_prog_data(brw->wm.base.prog_data);
-
-   brw_batch_emit(brw, GENX(3DSTATE_PS_EXTRA), psx) {
-      psx.PixelShaderValid = true;
-      psx.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
-      psx.PixelShaderKillsPixel = prog_data->uses_kill;
-      psx.AttributeEnable = prog_data->num_varying_inputs != 0;
-      psx.PixelShaderUsesSourceDepth = prog_data->uses_src_depth;
-      psx.PixelShaderUsesSourceW = prog_data->uses_src_w;
-      psx.PixelShaderIsPerSample = prog_data->persample_dispatch;
-
-      /* _NEW_MULTISAMPLE | BRW_NEW_CONSERVATIVE_RASTERIZATION */
-      if (prog_data->uses_sample_mask) {
-#if GFX_VER >= 9
-         if (prog_data->post_depth_coverage)
-            psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
-         else if (prog_data->inner_coverage && ctx->IntelConservativeRasterization)
-            psx.InputCoverageMaskState = ICMS_INNER_CONSERVATIVE;
-         else
-            psx.InputCoverageMaskState = ICMS_NORMAL;
-#else
-         psx.PixelShaderUsesInputCoverageMask = true;
-#endif
-      }
-
-      psx.oMaskPresenttoRenderTarget = prog_data->uses_omask;
-#if GFX_VER >= 9
-      psx.PixelShaderPullsBary = prog_data->pulls_bary;
-      psx.PixelShaderComputesStencil = prog_data->computed_stencil;
-#endif
-
-      /* The stricter cross-primitive coherency guarantees that the hardware
-       * gives us with the "Accesses UAV" bit set for at least one shader stage
-       * and the "UAV coherency required" bit set on the 3DPRIMITIVE command
-       * are redundant within the current image, atomic counter and SSBO GL
-       * APIs, which all have very loose ordering and coherency requirements
-       * and generally rely on the application to insert explicit barriers when
-       * a shader invocation is expected to see the memory writes performed by
-       * the invocations of some previous primitive.  Regardless of the value
-       * of "UAV coherency required", the "Accesses UAV" bits will implicitly
-       * cause an in most cases useless DC flush when the lowermost stage with
-       * the bit set finishes execution.
-       *
-       * It would be nice to disable it, but in some cases we can't because on
-       * Gfx8+ it also has an influence on rasterization via the PS UAV-only
-       * signal (which could be set independently from the coherency mechanism
-       * in the 3DSTATE_WM command on Gfx7), and because in some cases it will
-       * determine whether the hardware skips execution of the fragment shader
-       * or not via the ThreadDispatchEnable signal.  However if we know that
-       * GFX8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
-       * GFX8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
-       * difference so we may just disable it here.
-       *
-       * Gfx8 hardware tries to compute ThreadDispatchEnable for us but doesn't
-       * take into account KillPixels when no depth or stencil writes are
-       * enabled.  In order for occlusion queries to work correctly with no
-       * attachments, we need to force-enable here.
-       *
-       * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS |
-       * _NEW_COLOR
-       */
-      if ((prog_data->has_side_effects || prog_data->uses_kill) &&
-          !brw_color_buffer_write_enabled(brw))
-         psx.PixelShaderHasUAV = true;
-   }
-}
-
-const struct brw_tracked_state genX(ps_extra) = {
-   .dirty = {
-      .mesa  = _NEW_BUFFERS | _NEW_COLOR,
-      .brw   = BRW_NEW_BLORP |
-               BRW_NEW_CONTEXT |
-               BRW_NEW_FRAGMENT_PROGRAM |
-               BRW_NEW_FS_PROG_DATA |
-               BRW_NEW_CONSERVATIVE_RASTERIZATION,
-   },
-   .emit = genX(upload_ps_extra),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 8
-static void
-genX(upload_ps_blend)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   /* _NEW_BUFFERS */
-   struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
-   const bool buffer0_is_integer = ctx->DrawBuffer->_IntegerBuffers & 0x1;
-
-   /* _NEW_COLOR */
-   struct gl_colorbuffer_attrib *color = &ctx->Color;
-
-   brw_batch_emit(brw, GENX(3DSTATE_PS_BLEND), pb) {
-      /* BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR */
-      pb.HasWriteableRT = brw_color_buffer_write_enabled(brw);
-
-      bool alpha_to_one = false;
-
-      if (!buffer0_is_integer) {
-         /* _NEW_MULTISAMPLE */
-
-         if (_mesa_is_multisample_enabled(ctx)) {
-            pb.AlphaToCoverageEnable = ctx->Multisample.SampleAlphaToCoverage;
-            alpha_to_one = ctx->Multisample.SampleAlphaToOne;
-         }
-
-         pb.AlphaTestEnable = color->AlphaEnabled;
-      }
-
-      /* Used for implementing the following bit of GL_EXT_texture_integer:
-       * "Per-fragment operations that require floating-point color
-       *  components, including multisample alpha operations, alpha test,
-       *  blending, and dithering, have no effect when the corresponding
-       *  colors are written to an integer color buffer."
-       *
-       * The OpenGL specification 3.3 (page 196), section 4.1.3 says:
-       * "If drawbuffer zero is not NONE and the buffer it references has an
-       *  integer format, the SAMPLE_ALPHA_TO_COVERAGE and SAMPLE_ALPHA_TO_ONE
-       *  operations are skipped."
-       */
-      if (rb && !buffer0_is_integer && (color->BlendEnabled & 1)) {
-         GLenum eqRGB = color->Blend[0].EquationRGB;
-         GLenum eqA = color->Blend[0].EquationA;
-         GLenum srcRGB = color->Blend[0].SrcRGB;
-         GLenum dstRGB = color->Blend[0].DstRGB;
-         GLenum srcA = color->Blend[0].SrcA;
-         GLenum dstA = color->Blend[0].DstA;
-
-         if (eqRGB == GL_MIN || eqRGB == GL_MAX)
-            srcRGB = dstRGB = GL_ONE;
-
-         if (eqA == GL_MIN || eqA == GL_MAX)
-            srcA = dstA = GL_ONE;
-
-         /* Due to hardware limitations, the destination may have information
-          * in an alpha channel even when the format specifies no alpha
-          * channel. In order to avoid getting any incorrect blending due to
-          * that alpha channel, coerce the blend factors to values that will
-          * not read the alpha channel, but will instead use the correct
-          * implicit value for alpha.
-          */
-         if (!_mesa_base_format_has_channel(rb->_BaseFormat,
-                                            GL_TEXTURE_ALPHA_TYPE)) {
-            srcRGB = brw_fix_xRGB_alpha(srcRGB);
-            srcA = brw_fix_xRGB_alpha(srcA);
-            dstRGB = brw_fix_xRGB_alpha(dstRGB);
-            dstA = brw_fix_xRGB_alpha(dstA);
-         }
-
-         /* Alpha to One doesn't work with Dual Color Blending.  Override
-          * SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO.
-          */
-         if (alpha_to_one && color->_BlendUsesDualSrc & 0x1) {
-            srcRGB = fix_dual_blend_alpha_to_one(srcRGB);
-            srcA = fix_dual_blend_alpha_to_one(srcA);
-            dstRGB = fix_dual_blend_alpha_to_one(dstRGB);
-            dstA = fix_dual_blend_alpha_to_one(dstA);
-         }
-
-         /* BRW_NEW_FS_PROG_DATA */
-         const struct brw_wm_prog_data *wm_prog_data =
-            brw_wm_prog_data(brw->wm.base.prog_data);
-
-         /* The Dual Source Blending documentation says:
-          *
-          * "If SRC1 is included in a src/dst blend factor and
-          * a DualSource RT Write message is not used, results
-          * are UNDEFINED. (This reflects the same restriction in DX APIs,
-          * where undefined results are produced if âo1â is not written
-          * by a PS â there are no default values defined).
-          * If SRC1 is not included in a src/dst blend factor,
-          * dual source blending must be disabled."
-          *
-          * There is no way to gracefully fix this undefined situation
-          * so we just disable the blending to prevent possible issues.
-          */
-         pb.ColorBufferBlendEnable =
-            !(color->_BlendUsesDualSrc & 0x1) || wm_prog_data->dual_src_blend;
-         pb.SourceAlphaBlendFactor = brw_translate_blend_factor(srcA);
-         pb.DestinationAlphaBlendFactor = brw_translate_blend_factor(dstA);
-         pb.SourceBlendFactor = brw_translate_blend_factor(srcRGB);
-         pb.DestinationBlendFactor = brw_translate_blend_factor(dstRGB);
-
-         pb.IndependentAlphaBlendEnable =
-            srcA != srcRGB || dstA != dstRGB || eqA != eqRGB;
-      }
-   }
-}
-
-static const struct brw_tracked_state genX(ps_blend) = {
-   .dirty = {
-      .mesa = _NEW_BUFFERS |
-              _NEW_COLOR |
-              _NEW_MULTISAMPLE,
-      .brw = BRW_NEW_BLORP |
-             BRW_NEW_CONTEXT |
-             BRW_NEW_FRAGMENT_PROGRAM |
-             BRW_NEW_FS_PROG_DATA,
-   },
-   .emit = genX(upload_ps_blend)
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 8
-static void
-genX(emit_vf_topology)(struct brw_context *brw)
-{
-   brw_batch_emit(brw, GENX(3DSTATE_VF_TOPOLOGY), vftopo) {
-      vftopo.PrimitiveTopologyType = brw->primitive;
-   }
-}
-
-static const struct brw_tracked_state genX(vf_topology) = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BLORP |
-             BRW_NEW_PRIMITIVE,
-   },
-   .emit = genX(emit_vf_topology),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER >= 7
-static void
-genX(emit_mi_report_perf_count)(struct brw_context *brw,
-                                struct brw_bo *bo,
-                                uint32_t offset_in_bytes,
-                                uint32_t report_id)
-{
-   brw_batch_emit(brw, GENX(MI_REPORT_PERF_COUNT), mi_rpc) {
-      mi_rpc.MemoryAddress = ggtt_bo(bo, offset_in_bytes);
-      mi_rpc.ReportID = report_id;
-   }
-}
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-/**
- * Emit a 3DSTATE_SAMPLER_STATE_POINTERS_{VS,HS,GS,DS,PS} packet.
- */
-static void
-genX(emit_sampler_state_pointers_xs)(UNUSED struct brw_context *brw,
-                                     UNUSED struct brw_stage_state *stage_state)
-{
-#if GFX_VER >= 7
-   static const uint16_t packet_headers[] = {
-      [MESA_SHADER_VERTEX] = 43,
-      [MESA_SHADER_TESS_CTRL] = 44,
-      [MESA_SHADER_TESS_EVAL] = 45,
-      [MESA_SHADER_GEOMETRY] = 46,
-      [MESA_SHADER_FRAGMENT] = 47,
-   };
-
-   /* Ivybridge requires a workaround flush before VS packets. */
-   if (GFX_VERx10 == 70 &&
-       stage_state->stage == MESA_SHADER_VERTEX) {
-      gfx7_emit_vs_workaround_flush(brw);
-   }
-
-   brw_batch_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) {
-      ptr._3DCommandSubOpcode = packet_headers[stage_state->stage];
-      ptr.PointertoVSSamplerState = stage_state->sampler_offset;
-   }
-#endif
-}
-
-UNUSED static bool
-has_component(mesa_format format, int i)
-{
-   if (_mesa_is_format_color_format(format))
-      return _mesa_format_has_color_component(format, i);
-
-   /* depth and stencil have only one component */
-   return i == 0;
-}
-
-/**
- * Upload SAMPLER_BORDER_COLOR_STATE.
- */
-static void
-genX(upload_default_color)(struct brw_context *brw,
-                           const struct gl_sampler_object *sampler,
-                           UNUSED mesa_format format,
-                           GLenum base_format,
-                           bool is_integer_format, bool is_stencil_sampling,
-                           uint32_t *sdc_offset)
-{
-   union gl_color_union color;
-
-   switch (base_format) {
-   case GL_DEPTH_COMPONENT:
-      /* GL specs that border color for depth textures is taken from the
-       * R channel, while the hardware uses A.  Spam R into all the
-       * channels for safety.
-       */
-      color.ui[0] = sampler->Attrib.state.border_color.ui[0];
-      color.ui[1] = sampler->Attrib.state.border_color.ui[0];
-      color.ui[2] = sampler->Attrib.state.border_color.ui[0];
-      color.ui[3] = sampler->Attrib.state.border_color.ui[0];
-      break;
-   case GL_ALPHA:
-      color.ui[0] = 0u;
-      color.ui[1] = 0u;
-      color.ui[2] = 0u;
-      color.ui[3] = sampler->Attrib.state.border_color.ui[3];
-      break;
-   case GL_INTENSITY:
-      color.ui[0] = sampler->Attrib.state.border_color.ui[0];
-      color.ui[1] = sampler->Attrib.state.border_color.ui[0];
-      color.ui[2] = sampler->Attrib.state.border_color.ui[0];
-      color.ui[3] = sampler->Attrib.state.border_color.ui[0];
-      break;
-   case GL_LUMINANCE:
-      color.ui[0] = sampler->Attrib.state.border_color.ui[0];
-      color.ui[1] = sampler->Attrib.state.border_color.ui[0];
-      color.ui[2] = sampler->Attrib.state.border_color.ui[0];
-      color.ui[3] = float_as_int(1.0);
-      break;
-   case GL_LUMINANCE_ALPHA:
-      color.ui[0] = sampler->Attrib.state.border_color.ui[0];
-      color.ui[1] = sampler->Attrib.state.border_color.ui[0];
-      color.ui[2] = sampler->Attrib.state.border_color.ui[0];
-      color.ui[3] = sampler->Attrib.state.border_color.ui[3];
-      break;
-   default:
-      color.ui[0] = sampler->Attrib.state.border_color.ui[0];
-      color.ui[1] = sampler->Attrib.state.border_color.ui[1];
-      color.ui[2] = sampler->Attrib.state.border_color.ui[2];
-      color.ui[3] = sampler->Attrib.state.border_color.ui[3];
-      break;
-   }
-
-   /* In some cases we use an RGBA surface format for GL RGB textures,
-    * where we've initialized the A channel to 1.0.  We also have to set
-    * the border color alpha to 1.0 in that case.
-    */
-   if (base_format == GL_RGB)
-      color.ui[3] = float_as_int(1.0);
-
-   int alignment = 32;
-   if (GFX_VER >= 8) {
-      alignment = 64;
-   } else if (GFX_VERx10 == 75 && (is_integer_format || is_stencil_sampling)) {
-      alignment = 512;
-   }
-
-   uint32_t *sdc = brw_state_batch(
-      brw, GENX(SAMPLER_BORDER_COLOR_STATE_length) * sizeof(uint32_t),
-      alignment, sdc_offset);
-
-   struct GENX(SAMPLER_BORDER_COLOR_STATE) state = { 0 };
-
-#define ASSIGN(dst, src) \
-   do {                  \
-      dst = src;         \
-   } while (0)
-
-#define ASSIGNu16(dst, src) \
-   do {                     \
-      dst = (uint16_t)src;  \
-   } while (0)
-
-#define ASSIGNu8(dst, src) \
-   do {                    \
-      dst = (uint8_t)src;  \
-   } while (0)
-
-#define BORDER_COLOR_ATTR(macro, _color_type, src)              \
-   macro(state.BorderColor ## _color_type ## Red, src[0]);   \
-   macro(state.BorderColor ## _color_type ## Green, src[1]);   \
-   macro(state.BorderColor ## _color_type ## Blue, src[2]);   \
-   macro(state.BorderColor ## _color_type ## Alpha, src[3]);
-
-#if GFX_VER >= 8
-   /* On Broadwell, the border color is represented as four 32-bit floats,
-    * integers, or unsigned values, interpreted according to the surface
-    * format.  This matches the sampler->BorderColor union exactly; just
-    * memcpy the values.
-    */
-   BORDER_COLOR_ATTR(ASSIGN, 32bit, color.ui);
-#elif GFX_VERx10 == 75
-   if (is_integer_format || is_stencil_sampling) {
-      bool stencil = format == MESA_FORMAT_S_UINT8 || is_stencil_sampling;
-      const int bits_per_channel =
-         _mesa_get_format_bits(format, stencil ? GL_STENCIL_BITS : GL_RED_BITS);
-
-      /* From the Haswell PRM, "Command Reference: Structures", Page 36:
-       * "If any color channel is missing from the surface format,
-       *  corresponding border color should be programmed as zero and if
-       *  alpha channel is missing, corresponding Alpha border color should
-       *  be programmed as 1."
-       */
-      unsigned c[4] = { 0, 0, 0, 1 };
-      for (int i = 0; i < 4; i++) {
-         if (has_component(format, i))
-            c[i] = color.ui[i];
-      }
-
-      switch (bits_per_channel) {
-      case 8:
-         /* Copy RGBA in order. */
-         BORDER_COLOR_ATTR(ASSIGNu8, 8bit, c);
-         break;
-      case 10:
-         /* R10G10B10A2_UINT is treated like a 16-bit format. */
-      case 16:
-         BORDER_COLOR_ATTR(ASSIGNu16, 16bit, c);
-         break;
-      case 32:
-         if (base_format == GL_RG) {
-            /* Careful inspection of the tables reveals that for RG32 formats,
-             * the green channel needs to go where blue normally belongs.
-             */
-            state.BorderColor32bitRed = c[0];
-            state.BorderColor32bitBlue = c[1];
-            state.BorderColor32bitAlpha = 1;
-         } else {
-            /* Copy RGBA in order. */
-            BORDER_COLOR_ATTR(ASSIGN, 32bit, c);
-         }
-         break;
-      default:
-         assert(!"Invalid number of bits per channel in integer format.");
-         break;
-      }
-   } else {
-      BORDER_COLOR_ATTR(ASSIGN, Float, color.f);
-   }
-#elif GFX_VER == 5 || GFX_VER == 6
-   BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_UBYTE, Unorm, color.f);
-   BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_USHORT, Unorm16, color.f);
-   BORDER_COLOR_ATTR(UNCLAMPED_FLOAT_TO_SHORT, Snorm16, color.f);
-
-#define MESA_FLOAT_TO_HALF(dst, src) \
-   dst = _mesa_float_to_half(src);
-
-   BORDER_COLOR_ATTR(MESA_FLOAT_TO_HALF, Float16, color.f);
-
-#undef MESA_FLOAT_TO_HALF
-
-   state.BorderColorSnorm8Red   = state.BorderColorSnorm16Red >> 8;
-   state.BorderColorSnorm8Green = state.BorderColorSnorm16Green >> 8;
-   state.BorderColorSnorm8Blue  = state.BorderColorSnorm16Blue >> 8;
-   state.BorderColorSnorm8Alpha = state.BorderColorSnorm16Alpha >> 8;
-
-   BORDER_COLOR_ATTR(ASSIGN, Float, color.f);
-#elif GFX_VER == 4
-   BORDER_COLOR_ATTR(ASSIGN, , color.f);
-#else
-   BORDER_COLOR_ATTR(ASSIGN, Float, color.f);
-#endif
-
-#undef ASSIGN
-#undef BORDER_COLOR_ATTR
-
-   GENX(SAMPLER_BORDER_COLOR_STATE_pack)(brw, sdc, &state);
-}
-
-static uint32_t
-translate_wrap_mode(GLenum wrap, UNUSED bool using_nearest)
-{
-   switch (wrap) {
-   case GL_REPEAT:
-      return TCM_WRAP;
-   case GL_CLAMP:
-#if GFX_VER >= 8
-      /* GL_CLAMP is the weird mode where coordinates are clamped to
-       * [0.0, 1.0], so linear filtering of coordinates outside of
-       * [0.0, 1.0] give you half edge texel value and half border
-       * color.
-       *
-       * Gfx8+ supports this natively.
-       */
-      return TCM_HALF_BORDER;
-#else
-      /* On Gfx4-7.5, we clamp the coordinates in the fragment shader
-       * and set clamp_border here, which gets the result desired.
-       * We just use clamp(_to_edge) for nearest, because for nearest
-       * clamping to 1.0 gives border color instead of the desired
-       * edge texels.
-       */
-      if (using_nearest)
-         return TCM_CLAMP;
-      else
-         return TCM_CLAMP_BORDER;
-#endif
-   case GL_CLAMP_TO_EDGE:
-      return TCM_CLAMP;
-   case GL_CLAMP_TO_BORDER:
-      return TCM_CLAMP_BORDER;
-   case GL_MIRRORED_REPEAT:
-      return TCM_MIRROR;
-   case GL_MIRROR_CLAMP_TO_EDGE:
-      return TCM_MIRROR_ONCE;
-   default:
-      return TCM_WRAP;
-   }
-}
-
-/**
- * Return true if the given wrap mode requires the border color to exist.
- */
-static bool
-wrap_mode_needs_border_color(unsigned wrap_mode)
-{
-#if GFX_VER >= 8
-   return wrap_mode == TCM_CLAMP_BORDER ||
-          wrap_mode == TCM_HALF_BORDER;
-#else
-   return wrap_mode == TCM_CLAMP_BORDER;
-#endif
-}
-
-/**
- * Sets the sampler state for a single unit based off of the sampler key
- * entry.
- */
-static void
-genX(update_sampler_state)(struct brw_context *brw,
-                           GLenum target, bool tex_cube_map_seamless,
-                           GLfloat tex_unit_lod_bias,
-                           mesa_format format, GLenum base_format,
-                           const struct gl_texture_object *texObj,
-                           const struct gl_sampler_object *sampler,
-                           uint32_t *sampler_state)
-{
-   struct GENX(SAMPLER_STATE) samp_st = { 0 };
-
-   /* Select min and mip filters. */
-   switch (sampler->Attrib.MinFilter) {
-   case GL_NEAREST:
-      samp_st.MinModeFilter = MAPFILTER_NEAREST;
-      samp_st.MipModeFilter = MIPFILTER_NONE;
-      break;
-   case GL_LINEAR:
-      samp_st.MinModeFilter = MAPFILTER_LINEAR;
-      samp_st.MipModeFilter = MIPFILTER_NONE;
-      break;
-   case GL_NEAREST_MIPMAP_NEAREST:
-      samp_st.MinModeFilter = MAPFILTER_NEAREST;
-      samp_st.MipModeFilter = MIPFILTER_NEAREST;
-      break;
-   case GL_LINEAR_MIPMAP_NEAREST:
-      samp_st.MinModeFilter = MAPFILTER_LINEAR;
-      samp_st.MipModeFilter = MIPFILTER_NEAREST;
-      break;
-   case GL_NEAREST_MIPMAP_LINEAR:
-      samp_st.MinModeFilter = MAPFILTER_NEAREST;
-      samp_st.MipModeFilter = MIPFILTER_LINEAR;
-      break;
-   case GL_LINEAR_MIPMAP_LINEAR:
-      samp_st.MinModeFilter = MAPFILTER_LINEAR;
-      samp_st.MipModeFilter = MIPFILTER_LINEAR;
-      break;
-   default:
-      unreachable("not reached");
-   }
-
-   /* Select mag filter. */
-   samp_st.MagModeFilter = sampler->Attrib.MagFilter == GL_LINEAR ?
-      MAPFILTER_LINEAR : MAPFILTER_NEAREST;
-
-   /* Enable anisotropic filtering if desired. */
-   samp_st.MaximumAnisotropy = RATIO21;
-
-   if (sampler->Attrib.MaxAnisotropy > 1.0f) {
-      if (samp_st.MinModeFilter == MAPFILTER_LINEAR)
-         samp_st.MinModeFilter = MAPFILTER_ANISOTROPIC;
-      if (samp_st.MagModeFilter == MAPFILTER_LINEAR)
-         samp_st.MagModeFilter = MAPFILTER_ANISOTROPIC;
-
-      if (sampler->Attrib.MaxAnisotropy > 2.0f) {
-         samp_st.MaximumAnisotropy =
-            MIN2((sampler->Attrib.MaxAnisotropy - 2) / 2, RATIO161);
-      }
-   }
-
-   /* Set address rounding bits if not using nearest filtering. */
-   if (samp_st.MinModeFilter != MAPFILTER_NEAREST) {
-      samp_st.UAddressMinFilterRoundingEnable = true;
-      samp_st.VAddressMinFilterRoundingEnable = true;
-      samp_st.RAddressMinFilterRoundingEnable = true;
-   }
-
-   if (samp_st.MagModeFilter != MAPFILTER_NEAREST) {
-      samp_st.UAddressMagFilterRoundingEnable = true;
-      samp_st.VAddressMagFilterRoundingEnable = true;
-      samp_st.RAddressMagFilterRoundingEnable = true;
-   }
-
-   bool either_nearest =
-      sampler->Attrib.MinFilter == GL_NEAREST || sampler->Attrib.MagFilter == GL_NEAREST;
-   unsigned wrap_s = translate_wrap_mode(sampler->Attrib.WrapS, either_nearest);
-   unsigned wrap_t = translate_wrap_mode(sampler->Attrib.WrapT, either_nearest);
-   unsigned wrap_r = translate_wrap_mode(sampler->Attrib.WrapR, either_nearest);
-
-   if (target == GL_TEXTURE_CUBE_MAP ||
-       target == GL_TEXTURE_CUBE_MAP_ARRAY) {
-      /* Cube maps must use the same wrap mode for all three coordinate
-       * dimensions.  Prior to Haswell, only CUBE and CLAMP are valid.
-       *
-       * Ivybridge and Baytrail seem to have problems with CUBE mode and
-       * integer formats.  Fall back to CLAMP for now.
-       */
-      if ((tex_cube_map_seamless || sampler->Attrib.CubeMapSeamless) &&
-          !(GFX_VERx10 == 70 && texObj->_IsIntegerFormat)) {
-         wrap_s = TCM_CUBE;
-         wrap_t = TCM_CUBE;
-         wrap_r = TCM_CUBE;
-      } else {
-         wrap_s = TCM_CLAMP;
-         wrap_t = TCM_CLAMP;
-         wrap_r = TCM_CLAMP;
-      }
-   } else if (target == GL_TEXTURE_1D) {
-      /* There's a bug in 1D texture sampling - it actually pays
-       * attention to the wrap_t value, though it should not.
-       * Override the wrap_t value here to GL_REPEAT to keep
-       * any nonexistent border pixels from floating in.
-       */
-      wrap_t = TCM_WRAP;
-   }
-
-   samp_st.TCXAddressControlMode = wrap_s;
-   samp_st.TCYAddressControlMode = wrap_t;
-   samp_st.TCZAddressControlMode = wrap_r;
-
-   samp_st.ShadowFunction =
-      sampler->Attrib.CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB ?
-      brw_translate_shadow_compare_func(sampler->Attrib.CompareFunc) : 0;
-
-#if GFX_VER >= 7
-   /* Set shadow function. */
-   samp_st.AnisotropicAlgorithm =
-      samp_st.MinModeFilter == MAPFILTER_ANISOTROPIC ?
-      EWAApproximation : LEGACY;
-#endif
-
-#if GFX_VER >= 6
-   samp_st.NonnormalizedCoordinateEnable = target == GL_TEXTURE_RECTANGLE;
-#endif
-
-   const float hw_max_lod = GFX_VER >= 7 ? 14 : 13;
-   samp_st.MinLOD = CLAMP(sampler->Attrib.MinLod, 0, hw_max_lod);
-   samp_st.MaxLOD = CLAMP(sampler->Attrib.MaxLod, 0, hw_max_lod);
-   samp_st.TextureLODBias =
-      CLAMP(tex_unit_lod_bias + sampler->Attrib.LodBias, -16, 15);
-
-#if GFX_VER == 6
-   samp_st.BaseMipLevel =
-      CLAMP(texObj->Attrib.MinLevel + texObj->Attrib.BaseLevel, 0, hw_max_lod);
-   samp_st.MinandMagStateNotEqual =
-      samp_st.MinModeFilter != samp_st.MagModeFilter;
-#endif
-
-   /* Upload the border color if necessary.  If not, just point it at
-    * offset 0 (the start of the batch) - the color should be ignored,
-    * but that address won't fault in case something reads it anyway.
-    */
-   uint32_t border_color_offset = 0;
-   if (wrap_mode_needs_border_color(wrap_s) ||
-       wrap_mode_needs_border_color(wrap_t) ||
-       wrap_mode_needs_border_color(wrap_r)) {
-      genX(upload_default_color)(brw, sampler, format, base_format,
-                                 texObj->_IsIntegerFormat,
-                                 texObj->StencilSampling,
-                                 &border_color_offset);
-   }
-#if GFX_VER < 6
-      samp_st.BorderColorPointer =
-         ro_bo(brw->batch.state.bo, border_color_offset);
-#else
-      samp_st.BorderColorPointer = border_color_offset;
-#endif
-
-#if GFX_VER >= 8
-   samp_st.LODPreClampMode = CLAMP_MODE_OGL;
-#else
-   samp_st.LODPreClampEnable = true;
-#endif
-
-   GENX(SAMPLER_STATE_pack)(brw, sampler_state, &samp_st);
-}
-
-static void
-update_sampler_state(struct brw_context *brw,
-                     int unit,
-                     uint32_t *sampler_state)
-{
-   struct gl_context *ctx = &brw->ctx;
-   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-   const struct gl_texture_object *texObj = texUnit->_Current;
-   const struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
-
-   /* These don't use samplers at all. */
-   if (texObj->Target == GL_TEXTURE_BUFFER)
-      return;
-
-   struct gl_texture_image *firstImage = texObj->Image[0][texObj->Attrib.BaseLevel];
-   genX(update_sampler_state)(brw, texObj->Target,
-                              ctx->Texture.CubeMapSeamless,
-                              texUnit->LodBias,
-                              firstImage->TexFormat, firstImage->_BaseFormat,
-                              texObj, sampler,
-                              sampler_state);
-}
-
-static void
-genX(upload_sampler_state_table)(struct brw_context *brw,
-                                 struct gl_program *prog,
-                                 struct brw_stage_state *stage_state)
-{
-   struct gl_context *ctx = &brw->ctx;
-   uint32_t sampler_count = stage_state->sampler_count;
-
-   GLbitfield SamplersUsed = prog->SamplersUsed;
-
-   if (sampler_count == 0)
-      return;
-
-   /* SAMPLER_STATE is 4 DWords on all platforms. */
-   const int dwords = GENX(SAMPLER_STATE_length);
-   const int size_in_bytes = dwords * sizeof(uint32_t);
-
-   uint32_t *sampler_state = brw_state_batch(brw,
-                                             sampler_count * size_in_bytes,
-                                             32, &stage_state->sampler_offset);
-   /* memset(sampler_state, 0, sampler_count * size_in_bytes); */
-
-   for (unsigned s = 0; s < sampler_count; s++) {
-      if (SamplersUsed & (1 << s)) {
-         const unsigned unit = prog->SamplerUnits[s];
-         if (ctx->Texture.Unit[unit]._Current) {
-            update_sampler_state(brw, unit, sampler_state);
-         }
-      }
-
-      sampler_state += dwords;
-   }
-
-   if (GFX_VER >= 7 && stage_state->stage != MESA_SHADER_COMPUTE) {
-      /* Emit a 3DSTATE_SAMPLER_STATE_POINTERS_XS packet. */
-      genX(emit_sampler_state_pointers_xs)(brw, stage_state);
-   } else {
-      /* Flag that the sampler state table pointer has changed; later atoms
-       * will handle it.
-       */
-      brw->ctx.NewDriverState |= BRW_NEW_SAMPLER_STATE_TABLE;
-   }
-}
-
-static void
-genX(upload_fs_samplers)(struct brw_context *brw)
-{
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   struct gl_program *fs = brw->programs[MESA_SHADER_FRAGMENT];
-   genX(upload_sampler_state_table)(brw, fs, &brw->wm.base);
-}
-
-static const struct brw_tracked_state genX(fs_samplers) = {
-   .dirty = {
-      .mesa = _NEW_TEXTURE,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_FRAGMENT_PROGRAM,
-   },
-   .emit = genX(upload_fs_samplers),
-};
-
-static void
-genX(upload_vs_samplers)(struct brw_context *brw)
-{
-   /* BRW_NEW_VERTEX_PROGRAM */
-   struct gl_program *vs = brw->programs[MESA_SHADER_VERTEX];
-   genX(upload_sampler_state_table)(brw, vs, &brw->vs.base);
-}
-
-static const struct brw_tracked_state genX(vs_samplers) = {
-   .dirty = {
-      .mesa = _NEW_TEXTURE,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_VERTEX_PROGRAM,
-   },
-   .emit = genX(upload_vs_samplers),
-};
-
-#if GFX_VER >= 6
-static void
-genX(upload_gs_samplers)(struct brw_context *brw)
-{
-   /* BRW_NEW_GEOMETRY_PROGRAM */
-   struct gl_program *gs = brw->programs[MESA_SHADER_GEOMETRY];
-   if (!gs)
-      return;
-
-   genX(upload_sampler_state_table)(brw, gs, &brw->gs.base);
-}
-
-
-static const struct brw_tracked_state genX(gs_samplers) = {
-   .dirty = {
-      .mesa = _NEW_TEXTURE,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_GEOMETRY_PROGRAM,
-   },
-   .emit = genX(upload_gs_samplers),
-};
-#endif
-
-#if GFX_VER >= 7
-static void
-genX(upload_tcs_samplers)(struct brw_context *brw)
-{
-   /* BRW_NEW_TESS_PROGRAMS */
-   struct gl_program *tcs = brw->programs[MESA_SHADER_TESS_CTRL];
-   if (!tcs)
-      return;
-
-   genX(upload_sampler_state_table)(brw, tcs, &brw->tcs.base);
-}
-
-static const struct brw_tracked_state genX(tcs_samplers) = {
-   .dirty = {
-      .mesa = _NEW_TEXTURE,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_TESS_PROGRAMS,
-   },
-   .emit = genX(upload_tcs_samplers),
-};
-#endif
-
-#if GFX_VER >= 7
-static void
-genX(upload_tes_samplers)(struct brw_context *brw)
-{
-   /* BRW_NEW_TESS_PROGRAMS */
-   struct gl_program *tes = brw->programs[MESA_SHADER_TESS_EVAL];
-   if (!tes)
-      return;
-
-   genX(upload_sampler_state_table)(brw, tes, &brw->tes.base);
-}
-
-static const struct brw_tracked_state genX(tes_samplers) = {
-   .dirty = {
-      .mesa = _NEW_TEXTURE,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_TESS_PROGRAMS,
-   },
-   .emit = genX(upload_tes_samplers),
-};
-#endif
-
-#if GFX_VER >= 7
-static void
-genX(upload_cs_samplers)(struct brw_context *brw)
-{
-   /* BRW_NEW_COMPUTE_PROGRAM */
-   struct gl_program *cs = brw->programs[MESA_SHADER_COMPUTE];
-   if (!cs)
-      return;
-
-   genX(upload_sampler_state_table)(brw, cs, &brw->cs.base);
-}
-
-const struct brw_tracked_state genX(cs_samplers) = {
-   .dirty = {
-      .mesa = _NEW_TEXTURE,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_COMPUTE_PROGRAM,
-   },
-   .emit = genX(upload_cs_samplers),
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-#if GFX_VER <= 5
-
-static void genX(upload_blend_constant_color)(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_COLOR), blend_cc) {
-      blend_cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0];
-      blend_cc.BlendConstantColorGreen = ctx->Color.BlendColorUnclamped[1];
-      blend_cc.BlendConstantColorBlue = ctx->Color.BlendColorUnclamped[2];
-      blend_cc.BlendConstantColorAlpha = ctx->Color.BlendColorUnclamped[3];
-   }
-}
-
-static const struct brw_tracked_state genX(blend_constant_color) = {
-   .dirty = {
-      .mesa = _NEW_COLOR,
-      .brw = BRW_NEW_CONTEXT |
-             BRW_NEW_BLORP,
-   },
-   .emit = genX(upload_blend_constant_color)
-};
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-void
-genX(init_atoms)(struct brw_context *brw)
-{
-#if GFX_VER < 6
-   static const struct brw_tracked_state *render_atoms[] =
-   {
-      &genX(vf_statistics),
-
-      /* Once all the programs are done, we know how large urb entry
-       * sizes need to be and can decide if we need to change the urb
-       * layout.
-       */
-      &brw_curbe_offsets,
-      &brw_recalculate_urb_fence,
-
-      &genX(cc_vp),
-      &genX(color_calc_state),
-
-      /* Surface state setup.  Must come before the VS/WM unit.  The binding
-       * table upload must be last.
-       */
-      &brw_vs_pull_constants,
-      &brw_wm_pull_constants,
-      &brw_renderbuffer_surfaces,
-      &brw_renderbuffer_read_surfaces,
-      &brw_texture_surfaces,
-      &brw_vs_binding_table,
-      &brw_wm_binding_table,
-
-      &genX(fs_samplers),
-      &genX(vs_samplers),
-
-      /* These set up state for brw_psp_urb_cbs */
-      &genX(wm_state),
-      &genX(sf_clip_viewport),
-      &genX(sf_state),
-      &genX(vs_state), /* always required, enabled or not */
-      &genX(clip_state),
-      &genX(gs_state),
-
-      /* Command packets:
-       */
-      &brw_binding_table_pointers,
-      &genX(blend_constant_color),
-
-      &brw_depthbuffer,
-
-      &genX(polygon_stipple),
-      &genX(polygon_stipple_offset),
-
-      &genX(line_stipple),
-
-      &brw_psp_urb_cbs,
-
-      &genX(drawing_rect),
-      &brw_indices, /* must come before brw_vertices */
-      &genX(index_buffer),
-      &genX(vertices),
-
-      &brw_constant_buffer
-   };
-#elif GFX_VER == 6
-   static const struct brw_tracked_state *render_atoms[] =
-   {
-      &genX(vf_statistics),
-
-      &genX(sf_clip_viewport),
-
-      /* Command packets: */
-
-      &genX(cc_vp),
-
-      &gfx6_urb,
-      &genX(blend_state),         /* must do before cc unit */
-      &genX(color_calc_state),    /* must do before cc unit */
-      &genX(depth_stencil_state), /* must do before cc unit */
-
-      &genX(vs_push_constants), /* Before vs_state */
-      &genX(gs_push_constants), /* Before gs_state */
-      &genX(wm_push_constants), /* Before wm_state */
-
-      /* Surface state setup.  Must come before the VS/WM unit.  The binding
-       * table upload must be last.
-       */
-      &brw_vs_pull_constants,
-      &brw_vs_ubo_surfaces,
-      &brw_gs_pull_constants,
-      &brw_gs_ubo_surfaces,
-      &brw_wm_pull_constants,
-      &brw_wm_ubo_surfaces,
-      &gfx6_renderbuffer_surfaces,
-      &brw_renderbuffer_read_surfaces,
-      &brw_texture_surfaces,
-      &gfx6_sol_surface,
-      &brw_vs_binding_table,
-      &gfx6_gs_binding_table,
-      &brw_wm_binding_table,
-
-      &genX(fs_samplers),
-      &genX(vs_samplers),
-      &genX(gs_samplers),
-      &gfx6_sampler_state,
-      &genX(multisample_state),
-
-      &genX(vs_state),
-      &genX(gs_state),
-      &genX(clip_state),
-      &genX(sf_state),
-      &genX(wm_state),
-
-      &genX(scissor_state),
-
-      &gfx6_binding_table_pointers,
-
-      &brw_depthbuffer,
-
-      &genX(polygon_stipple),
-      &genX(polygon_stipple_offset),
-
-      &genX(line_stipple),
-
-      &genX(drawing_rect),
-
-      &brw_indices, /* must come before brw_vertices */
-      &genX(index_buffer),
-      &genX(vertices),
-   };
-#elif GFX_VER == 7
-   static const struct brw_tracked_state *render_atoms[] =
-   {
-      &genX(vf_statistics),
-
-      /* Command packets: */
-
-      &genX(cc_vp),
-      &genX(sf_clip_viewport),
-
-      &gfx7_l3_state,
-      &gfx7_push_constant_space,
-      &gfx7_urb,
-#if GFX_VERx10 == 75
-      &genX(cc_and_blend_state),
-#else
-      &genX(blend_state),         /* must do before cc unit */
-      &genX(color_calc_state),    /* must do before cc unit */
-#endif
-      &genX(depth_stencil_state), /* must do before cc unit */
-
-      &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
-      &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
-      &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
-      &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
-      &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
-
-      &genX(vs_push_constants), /* Before vs_state */
-      &genX(tcs_push_constants),
-      &genX(tes_push_constants),
-      &genX(gs_push_constants), /* Before gs_state */
-      &genX(wm_push_constants), /* Before wm_surfaces and constant_buffer */
-
-      /* Surface state setup.  Must come before the VS/WM unit.  The binding
-       * table upload must be last.
-       */
-      &brw_vs_pull_constants,
-      &brw_vs_ubo_surfaces,
-      &brw_tcs_pull_constants,
-      &brw_tcs_ubo_surfaces,
-      &brw_tes_pull_constants,
-      &brw_tes_ubo_surfaces,
-      &brw_gs_pull_constants,
-      &brw_gs_ubo_surfaces,
-      &brw_wm_pull_constants,
-      &brw_wm_ubo_surfaces,
-      &gfx6_renderbuffer_surfaces,
-      &brw_renderbuffer_read_surfaces,
-      &brw_texture_surfaces,
-
-      &genX(push_constant_packets),
-
-      &brw_vs_binding_table,
-      &brw_tcs_binding_table,
-      &brw_tes_binding_table,
-      &brw_gs_binding_table,
-      &brw_wm_binding_table,
-
-      &genX(fs_samplers),
-      &genX(vs_samplers),
-      &genX(tcs_samplers),
-      &genX(tes_samplers),
-      &genX(gs_samplers),
-      &genX(multisample_state),
-
-      &genX(vs_state),
-      &genX(hs_state),
-      &genX(te_state),
-      &genX(ds_state),
-      &genX(gs_state),
-      &genX(sol_state),
-      &genX(clip_state),
-      &genX(sbe_state),
-      &genX(sf_state),
-      &genX(wm_state),
-      &genX(ps_state),
-
-      &genX(scissor_state),
-
-      &brw_depthbuffer,
-
-      &genX(polygon_stipple),
-      &genX(polygon_stipple_offset),
-
-      &genX(line_stipple),
-
-      &genX(drawing_rect),
-
-      &brw_indices, /* must come before brw_vertices */
-      &genX(index_buffer),
-      &genX(vertices),
-
-#if GFX_VERx10 == 75
-      &genX(cut_index),
-#endif
-   };
-#elif GFX_VER >= 8
-   static const struct brw_tracked_state *render_atoms[] =
-   {
-      &genX(vf_statistics),
-
-      &genX(cc_vp),
-      &genX(sf_clip_viewport),
-
-      &gfx7_l3_state,
-      &gfx7_push_constant_space,
-      &gfx7_urb,
-      &genX(blend_state),
-      &genX(color_calc_state),
-
-      &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
-      &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
-      &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
-      &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
-      &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
-
-      &genX(vs_push_constants), /* Before vs_state */
-      &genX(tcs_push_constants),
-      &genX(tes_push_constants),
-      &genX(gs_push_constants), /* Before gs_state */
-      &genX(wm_push_constants), /* Before wm_surfaces and constant_buffer */
-
-      /* Surface state setup.  Must come before the VS/WM unit.  The binding
-       * table upload must be last.
-       */
-      &brw_vs_pull_constants,
-      &brw_vs_ubo_surfaces,
-      &brw_tcs_pull_constants,
-      &brw_tcs_ubo_surfaces,
-      &brw_tes_pull_constants,
-      &brw_tes_ubo_surfaces,
-      &brw_gs_pull_constants,
-      &brw_gs_ubo_surfaces,
-      &brw_wm_pull_constants,
-      &brw_wm_ubo_surfaces,
-      &gfx6_renderbuffer_surfaces,
-      &brw_renderbuffer_read_surfaces,
-      &brw_texture_surfaces,
-
-      &genX(push_constant_packets),
-
-      &brw_vs_binding_table,
-      &brw_tcs_binding_table,
-      &brw_tes_binding_table,
-      &brw_gs_binding_table,
-      &brw_wm_binding_table,
-
-      &genX(fs_samplers),
-      &genX(vs_samplers),
-      &genX(tcs_samplers),
-      &genX(tes_samplers),
-      &genX(gs_samplers),
-      &genX(multisample_state),
-
-      &genX(vs_state),
-      &genX(hs_state),
-      &genX(te_state),
-      &genX(ds_state),
-      &genX(gs_state),
-      &genX(sol_state),
-      &genX(clip_state),
-      &genX(raster_state),
-      &genX(sbe_state),
-      &genX(sf_state),
-      &genX(ps_blend),
-      &genX(ps_extra),
-      &genX(ps_state),
-      &genX(depth_stencil_state),
-      &genX(wm_state),
-
-      &genX(scissor_state),
-
-      &brw_depthbuffer,
-
-      &genX(polygon_stipple),
-      &genX(polygon_stipple_offset),
-
-      &genX(line_stipple),
-
-      &genX(drawing_rect),
-
-      &genX(vf_topology),
-
-      &brw_indices,
-      &genX(index_buffer),
-      &genX(vertices),
-
-      &genX(cut_index),
-      &gfx8_pma_fix,
-   };
-#endif
-
-   STATIC_ASSERT(ARRAY_SIZE(render_atoms) <= ARRAY_SIZE(brw->render_atoms));
-   brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
-                           render_atoms, ARRAY_SIZE(render_atoms));
-
-#if GFX_VER >= 7
-   static const struct brw_tracked_state *compute_atoms[] =
-   {
-      &gfx7_l3_state,
-      &brw_cs_image_surfaces,
-      &genX(cs_push_constants),
-      &genX(cs_pull_constants),
-      &brw_cs_ubo_surfaces,
-      &brw_cs_texture_surfaces,
-      &brw_cs_work_groups_surface,
-      &genX(cs_samplers),
-      &genX(cs_state),
-   };
-
-   STATIC_ASSERT(ARRAY_SIZE(compute_atoms) <= ARRAY_SIZE(brw->compute_atoms));
-   brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
-                           compute_atoms, ARRAY_SIZE(compute_atoms));
-
-   brw->vtbl.emit_mi_report_perf_count = genX(emit_mi_report_perf_count);
-   brw->vtbl.emit_compute_walker = genX(emit_gpgpu_walker);
-#endif
-
-   brw->vtbl.emit_state_base_address = genX(emit_state_base_address);
-
-   assert(brw->screen->devinfo.verx10 == GFX_VERx10);
-}
diff --git a/src/mesa/drivers/dri/i965/gfx4_blorp_exec.h b/src/mesa/drivers/dri/i965/gfx4_blorp_exec.h
deleted file mode 100644
index 62a008e..0000000
--- a/src/mesa/drivers/dri/i965/gfx4_blorp_exec.h
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * Copyright Â© 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-static inline struct blorp_address
-dynamic_state_address(struct blorp_batch *batch, uint32_t offset)
-{
-   assert(batch->blorp->driver_ctx == batch->driver_batch);
-   struct brw_context *brw = batch->driver_batch;
-
-   return (struct blorp_address) {
-      .buffer = brw->batch.state.bo,
-      .offset = offset,
-   };
-}
-
-static inline struct blorp_address
-instruction_state_address(struct blorp_batch *batch, uint32_t offset)
-{
-   assert(batch->blorp->driver_ctx == batch->driver_batch);
-   struct brw_context *brw = batch->driver_batch;
-
-   return (struct blorp_address) {
-      .buffer = brw->cache.bo,
-      .offset = offset,
-   };
-}
-
-static struct blorp_address
-blorp_emit_vs_state(struct blorp_batch *batch)
-{
-   assert(batch->blorp->driver_ctx == batch->driver_batch);
-   struct brw_context *brw = batch->driver_batch;
-
-   uint32_t offset;
-   blorp_emit_dynamic(batch, GENX(VS_STATE), vs, 64, &offset) {
-      vs.Enable = false;
-      vs.URBEntryAllocationSize = brw->urb.vsize - 1;
-#if GFX_VER == 5
-      vs.NumberofURBEntries = brw->urb.nr_vs_entries >> 2;
-#else
-      vs.NumberofURBEntries = brw->urb.nr_vs_entries;
-#endif
-   }
-
-   return dynamic_state_address(batch, offset);
-}
-
-static struct blorp_address
-blorp_emit_sf_state(struct blorp_batch *batch,
-                    const struct blorp_params *params)
-{
-   assert(batch->blorp->driver_ctx == batch->driver_batch);
-   struct brw_context *brw = batch->driver_batch;
-   const struct brw_sf_prog_data *prog_data = params->sf_prog_data;
-
-   uint32_t offset;
-   blorp_emit_dynamic(batch, GENX(SF_STATE), sf, 64, &offset) {
-#if GFX_VER == 4
-      sf.KernelStartPointer =
-         instruction_state_address(batch, params->sf_prog_kernel);
-#else
-      sf.KernelStartPointer = params->sf_prog_kernel;
-#endif
-      sf.GRFRegisterCount = DIV_ROUND_UP(prog_data->total_grf, 16) - 1;
-      sf.VertexURBEntryReadLength = prog_data->urb_read_length;
-      sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
-      sf.DispatchGRFStartRegisterForURBData = 3;
-
-      sf.URBEntryAllocationSize = brw->urb.sfsize - 1;
-      sf.NumberofURBEntries = brw->urb.nr_sf_entries;
-
-#if GFX_VER == 5
-      sf.MaximumNumberofThreads = MIN2(48, brw->urb.nr_sf_entries) - 1;
-#else
-      sf.MaximumNumberofThreads = MIN2(24, brw->urb.nr_sf_entries) - 1;
-#endif
-
-      sf.ViewportTransformEnable = false;
-
-      sf.CullMode = CULLMODE_NONE;
-   }
-
-   return dynamic_state_address(batch, offset);
-}
-
-static struct blorp_address
-blorp_emit_wm_state(struct blorp_batch *batch,
-                    const struct blorp_params *params)
-{
-   const struct brw_wm_prog_data *prog_data = params->wm_prog_data;
-
-   uint32_t offset;
-   blorp_emit_dynamic(batch, GENX(WM_STATE), wm, 64, &offset) {
-      if (params->src.enabled) {
-         /* Iron Lake can't do sampler prefetch */
-         wm.SamplerCount = (GFX_VER != 5);
-         wm.BindingTableEntryCount = 2;
-         uint32_t sampler = blorp_emit_sampler_state(batch);
-         wm.SamplerStatePointer = dynamic_state_address(batch, sampler);
-      }
-
-      if (prog_data) {
-         wm.DispatchGRFStartRegisterForConstantSetupData0 =
-            prog_data->base.dispatch_grf_start_reg;
-         wm.SetupURBEntryReadLength = prog_data->num_varying_inputs * 2;
-         wm.SetupURBEntryReadOffset = 0;
-
-         wm.DepthCoefficientURBReadOffset = 1;
-         wm.PixelShaderKillsPixel = prog_data->uses_kill;
-         wm.ThreadDispatchEnable = true;
-         wm.EarlyDepthTestEnable = true;
-
-         wm._8PixelDispatchEnable = prog_data->dispatch_8;
-         wm._16PixelDispatchEnable = prog_data->dispatch_16;
-         wm._32PixelDispatchEnable = prog_data->dispatch_32;
-
-#if GFX_VER == 4
-         wm.KernelStartPointer0 =
-            instruction_state_address(batch, params->wm_prog_kernel);
-         wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0);
-#else
-         wm.KernelStartPointer0 = params->wm_prog_kernel +
-                                  brw_wm_prog_data_prog_offset(prog_data, wm, 0);
-         wm.KernelStartPointer1 = params->wm_prog_kernel +
-                                  brw_wm_prog_data_prog_offset(prog_data, wm, 1);
-         wm.KernelStartPointer2 = params->wm_prog_kernel +
-                                  brw_wm_prog_data_prog_offset(prog_data, wm, 2);
-         wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0);
-         wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(prog_data, wm, 1);
-         wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(prog_data, wm, 2);
-#endif
-      }
-
-      wm.MaximumNumberofThreads =
-         batch->blorp->compiler->devinfo->max_wm_threads - 1;
-   }
-
-   return dynamic_state_address(batch, offset);
-}
-
-static struct blorp_address
-blorp_emit_color_calc_state(struct blorp_batch *batch)
-{
-   uint32_t cc_viewport = blorp_emit_cc_viewport(batch);
-
-   uint32_t offset;
-   blorp_emit_dynamic(batch, GENX(COLOR_CALC_STATE), cc, 64, &offset) {
-      cc.CCViewportStatePointer = dynamic_state_address(batch, cc_viewport);
-   }
-
-   return dynamic_state_address(batch, offset);
-}
-
-static void
-blorp_emit_pipeline(struct blorp_batch *batch,
-                    const struct blorp_params *params)
-{
-   assert(batch->blorp->driver_ctx == batch->driver_batch);
-   struct brw_context *brw = batch->driver_batch;
-
-   emit_urb_config(batch, params, NULL);
-
-   blorp_emit(batch, GENX(3DSTATE_PIPELINED_POINTERS), pp) {
-      pp.PointertoVSState = blorp_emit_vs_state(batch);
-      pp.GSEnable = false;
-      pp.ClipEnable = false;
-      pp.PointertoSFState = blorp_emit_sf_state(batch, params);
-      pp.PointertoWMState = blorp_emit_wm_state(batch, params);
-      pp.PointertoColorCalcState = blorp_emit_color_calc_state(batch);
-   }
-
-   brw_upload_urb_fence(brw);
-
-   blorp_emit(batch, GENX(CS_URB_STATE), curb);
-   blorp_emit(batch, GENX(CONSTANT_BUFFER), curb);
-}
diff --git a/src/mesa/drivers/dri/i965/gfx6_clip_state.c b/src/mesa/drivers/dri/i965/gfx6_clip_state.c
deleted file mode 100644
index 8e3fae7..0000000
--- a/src/mesa/drivers/dri/i965/gfx6_clip_state.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright Â© 2009 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *
- */
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "compiler/brw_eu_defines.h"
-#include "brw_util.h"
-#include "brw_batch.h"
-#include "main/fbobject.h"
-#include "main/framebuffer.h"
-
-bool
-brw_is_drawing_points(const struct brw_context *brw)
-{
-   /* Determine if the primitives *reaching the SF* are points */
-   /* _NEW_POLYGON */
-   if (brw->ctx.Polygon.FrontMode == GL_POINT ||
-       brw->ctx.Polygon.BackMode == GL_POINT) {
-      return true;
-   }
-
-   if (brw->gs.base.prog_data) {
-      /* BRW_NEW_GS_PROG_DATA */
-      return brw_gs_prog_data(brw->gs.base.prog_data)->output_topology ==
-             _3DPRIM_POINTLIST;
-   } else if (brw->tes.base.prog_data) {
-      /* BRW_NEW_TES_PROG_DATA */
-      return brw_tes_prog_data(brw->tes.base.prog_data)->output_topology ==
-             BRW_TESS_OUTPUT_TOPOLOGY_POINT;
-   } else {
-      /* BRW_NEW_PRIMITIVE */
-      return brw->primitive == _3DPRIM_POINTLIST;
-   }
-}
-
-bool
-brw_is_drawing_lines(const struct brw_context *brw)
-{
-   /* Determine if the primitives *reaching the SF* are points */
-   /* _NEW_POLYGON */
-   if (brw->ctx.Polygon.FrontMode == GL_LINE ||
-       brw->ctx.Polygon.BackMode == GL_LINE) {
-      return true;
-   }
-
-   if (brw->gs.base.prog_data) {
-      /* BRW_NEW_GS_PROG_DATA */
-      return brw_gs_prog_data(brw->gs.base.prog_data)->output_topology ==
-             _3DPRIM_LINESTRIP;
-   } else if (brw->tes.base.prog_data) {
-      /* BRW_NEW_TES_PROG_DATA */
-      return brw_tes_prog_data(brw->tes.base.prog_data)->output_topology ==
-             BRW_TESS_OUTPUT_TOPOLOGY_LINE;
-   } else {
-      /* BRW_NEW_PRIMITIVE */
-      switch (brw->primitive) {
-      case _3DPRIM_LINELIST:
-      case _3DPRIM_LINESTRIP:
-      case _3DPRIM_LINELOOP:
-         return true;
-      }
-   }
-   return false;
-}
diff --git a/src/mesa/drivers/dri/i965/gfx6_constant_state.c b/src/mesa/drivers/dri/i965/gfx6_constant_state.c
deleted file mode 100644
index 1f0e9fb..0000000
--- a/src/mesa/drivers/dri/i965/gfx6_constant_state.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- * Copyright Â© 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_context.h"
-#include "brw_cs.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "brw_program.h"
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-#include "program/prog_parameter.h"
-#include "main/shaderapi.h"
-
-static uint32_t
-f_as_u32(float f)
-{
-   union fi fi = { .f = f };
-   return fi.ui;
-}
-
-static uint32_t
-brw_param_value(struct brw_context *brw,
-                const struct gl_program *prog,
-                const struct brw_stage_state *stage_state,
-                uint32_t param)
-{
-   struct gl_context *ctx = &brw->ctx;
-
-   switch (BRW_PARAM_DOMAIN(param)) {
-   case BRW_PARAM_DOMAIN_BUILTIN:
-      if (param == BRW_PARAM_BUILTIN_ZERO) {
-         return 0;
-      } else if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(param)) {
-         gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
-         unsigned idx = BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(param);
-         unsigned comp = BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(param);
-         return ((uint32_t *)clip_planes[idx])[comp];
-      } else if (param >= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X &&
-                 param <= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W) {
-         unsigned i = param - BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
-         return f_as_u32(ctx->TessCtrlProgram.patch_default_outer_level[i]);
-      } else if (param == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X) {
-         return f_as_u32(ctx->TessCtrlProgram.patch_default_inner_level[0]);
-      } else if (param == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y) {
-         return f_as_u32(ctx->TessCtrlProgram.patch_default_inner_level[1]);
-      } else if (param >= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X &&
-                 param <= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_Z) {
-         unsigned i = param - BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X;
-         return brw->compute.group_size[i];
-      } else {
-         unreachable("Invalid param builtin");
-      }
-
-   case BRW_PARAM_DOMAIN_PARAMETER: {
-      unsigned idx = BRW_PARAM_PARAMETER_IDX(param);
-      unsigned offset = prog->Parameters->Parameters[idx].ValueOffset;
-      unsigned comp = BRW_PARAM_PARAMETER_COMP(param);
-      assert(idx < prog->Parameters->NumParameters);
-      return prog->Parameters->ParameterValues[offset + comp].u;
-   }
-
-   case BRW_PARAM_DOMAIN_UNIFORM: {
-      unsigned idx = BRW_PARAM_UNIFORM_IDX(param);
-      assert(idx < prog->sh.data->NumUniformDataSlots);
-      return prog->sh.data->UniformDataSlots[idx].u;
-   }
-
-   case BRW_PARAM_DOMAIN_IMAGE: {
-      unsigned idx = BRW_PARAM_IMAGE_IDX(param);
-      unsigned offset = BRW_PARAM_IMAGE_OFFSET(param);
-      assert(offset < ARRAY_SIZE(stage_state->image_param));
-      return ((uint32_t *)&stage_state->image_param[idx])[offset];
-   }
-
-   default:
-      unreachable("Invalid param domain");
-   }
-}
-
-
-void
-brw_populate_constant_data(struct brw_context *brw,
-                           const struct gl_program *prog,
-                           const struct brw_stage_state *stage_state,
-                           void *void_dst,
-                           const uint32_t *param,
-                           unsigned nr_params)
-{
-   uint32_t *dst = void_dst;
-   for (unsigned i = 0; i < nr_params; i++)
-      dst[i] = brw_param_value(brw, prog, stage_state, param[i]);
-}
-
-
-/**
- * Creates a streamed BO containing the push constants for the VS or GS on
- * gfx6+.
- *
- * Push constants are constant values (such as GLSL uniforms) that are
- * pre-loaded into a shader stage's register space at thread spawn time.
- *
- * Not all GLSL uniforms will be uploaded as push constants: The hardware has
- * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be
- * uploaded as push constants, while GL 4.4 requires at least 1024 components
- * to be usable for the VS.  Plus, currently we always use pull constants
- * instead of push constants when doing variable-index array access.
- *
- * See brw_curbe.c for the equivalent gfx4/5 code.
- */
-void
-gfx6_upload_push_constants(struct brw_context *brw,
-                           const struct gl_program *prog,
-                           const struct brw_stage_prog_data *prog_data,
-                           struct brw_stage_state *stage_state)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
-
-   bool active = prog_data &&
-      (stage_state->stage != MESA_SHADER_TESS_CTRL ||
-       brw->programs[MESA_SHADER_TESS_EVAL]);
-
-   if (active)
-      _mesa_shader_write_subroutine_indices(ctx, stage_state->stage);
-
-   if (!active || prog_data->nr_params == 0) {
-      stage_state->push_const_size = 0;
-   } else {
-      /* Updates the ParamaterValues[i] pointers for all parameters of the
-       * basic type of PROGRAM_STATE_VAR.
-       */
-      /* XXX: Should this happen somewhere before to get our state flag set? */
-      if (prog)
-         _mesa_load_state_parameters(ctx, prog->Parameters);
-
-      int i;
-      const int size = prog_data->nr_params * sizeof(gl_constant_value);
-      gl_constant_value *param;
-      if (devinfo->verx10 >= 75) {
-         param = brw_upload_space(&brw->upload, size, 32,
-                                  &stage_state->push_const_bo,
-                                  &stage_state->push_const_offset);
-      } else {
-         param = brw_state_batch(brw, size, 32,
-                                 &stage_state->push_const_offset);
-      }
-
-      STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
-
-      /* _NEW_PROGRAM_CONSTANTS
-       *
-       * Also _NEW_TRANSFORM -- we may reference clip planes other than as a
-       * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS
-       * wouldn't be set for them.
-       */
-      brw_populate_constant_data(brw, prog, stage_state, param,
-                                 prog_data->param,
-                                 prog_data->nr_params);
-
-      if (0) {
-         fprintf(stderr, "%s constants:\n",
-                 _mesa_shader_stage_to_string(stage_state->stage));
-         for (i = 0; i < prog_data->nr_params; i++) {
-            if ((i & 7) == 0)
-               fprintf(stderr, "g%d: ",
-                       prog_data->dispatch_grf_start_reg + i / 8);
-            fprintf(stderr, "%8f ", param[i].f);
-            if ((i & 7) == 7)
-               fprintf(stderr, "\n");
-         }
-         if ((i & 7) != 0)
-            fprintf(stderr, "\n");
-         fprintf(stderr, "\n");
-      }
-
-      stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
-      /* We can only push 32 registers of constants at a time. */
-
-      /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS:
-       *
-       *     "The sum of all four read length fields (each incremented to
-       *      represent the actual read length) must be less than or equal to
-       *      32"
-       *
-       * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS:
-       *
-       *     "The sum of all four read length fields must be less than or
-       *      equal to the size of 64"
-       *
-       * The other shader stages all match the VS's limits.
-       */
-      assert(stage_state->push_const_size <= 32);
-   }
-
-   stage_state->push_constants_dirty = true;
-}
-
-
-/**
- * Creates a temporary BO containing the pull constant data for the shader
- * stage, and the SURFACE_STATE struct that points at it.
- *
- * Pull constants are GLSL uniforms (and other constant data) beyond what we
- * could fit as push constants, or that have variable-index array access
- * (which is easiest to support using pull constants, and avoids filling
- * register space with mostly-unused data).
- *
- * Compare this path to brw_curbe.c for gfx4/5 push constants, and
- * gfx6_vs_state.c for gfx6+ push constants.
- */
-void
-brw_upload_pull_constants(struct brw_context *brw,
-                          GLbitfield64 brw_new_constbuf,
-                          const struct gl_program *prog,
-                          struct brw_stage_state *stage_state,
-                          const struct brw_stage_prog_data *prog_data)
-{
-   unsigned i;
-   uint32_t surf_index = prog_data->binding_table.pull_constants_start;
-
-   if (!prog_data->nr_pull_params) {
-      if (stage_state->surf_offset[surf_index]) {
-         stage_state->surf_offset[surf_index] = 0;
-         brw->ctx.NewDriverState |= brw_new_constbuf;
-      }
-      return;
-   }
-
-   /* Updates the ParamaterValues[i] pointers for all parameters of the
-    * basic type of PROGRAM_STATE_VAR.
-    */
-   _mesa_load_state_parameters(&brw->ctx, prog->Parameters);
-
-   /* BRW_NEW_*_PROG_DATA | _NEW_PROGRAM_CONSTANTS */
-   uint32_t size = prog_data->nr_pull_params * 4;
-   struct brw_bo *const_bo = NULL;
-   uint32_t const_offset;
-   gl_constant_value *constants = brw_upload_space(&brw->upload, size, 64,
-                                                   &const_bo, &const_offset);
-
-   STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
-
-   brw_populate_constant_data(brw, prog, stage_state, constants,
-                              prog_data->pull_param,
-                              prog_data->nr_pull_params);
-
-   if (0) {
-      for (i = 0; i < ALIGN(prog_data->nr_pull_params, 4) / 4; i++) {
-         const gl_constant_value *row = &constants[i * 4];
-         fprintf(stderr, "const surface %3d: %4.3f %4.3f %4.3f %4.3f\n",
-                 i, row[0].f, row[1].f, row[2].f, row[3].f);
-      }
-   }
-
-   brw_emit_buffer_surface_state(brw, &stage_state->surf_offset[surf_index],
-                                 const_bo, const_offset,
-                                 ISL_FORMAT_R32G32B32A32_FLOAT,
-                                 size, 1, 0);
-
-   brw_bo_unreference(const_bo);
-
-   brw->ctx.NewDriverState |= brw_new_constbuf;
-}
-
-/**
- * Creates a region containing the push constants for the CS on gfx7+.
- *
- * Push constants are constant values (such as GLSL uniforms) that are
- * pre-loaded into a shader stage's register space at thread spawn time.
- *
- * For other stages, see brw_curbe.c:brw_upload_constant_buffer for the
- * equivalent gfx4/5 code and gfx6_vs_state.c:gfx6_upload_push_constants for
- * gfx6+.
- */
-void
-brw_upload_cs_push_constants(struct brw_context *brw,
-                             const struct gl_program *prog,
-                             const struct brw_cs_prog_data *cs_prog_data,
-                             struct brw_stage_state *stage_state)
-{
-   struct gl_context *ctx = &brw->ctx;
-   const struct brw_stage_prog_data *prog_data =
-      (struct brw_stage_prog_data*) cs_prog_data;
-
-   /* Updates the ParamaterValues[i] pointers for all parameters of the
-    * basic type of PROGRAM_STATE_VAR.
-    */
-   /* XXX: Should this happen somewhere before to get our state flag set? */
-   _mesa_load_state_parameters(ctx, prog->Parameters);
-
-   const struct brw_cs_dispatch_info dispatch =
-      brw_cs_get_dispatch_info(&brw->screen->devinfo, cs_prog_data,
-                               brw->compute.group_size);
-   const unsigned push_const_size =
-      brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);
-
-   if (push_const_size == 0) {
-      stage_state->push_const_size = 0;
-      return;
-   }
-
-
-   uint32_t *param =
-      brw_state_batch(brw, ALIGN(push_const_size, 64),
-                      64, &stage_state->push_const_offset);
-   assert(param);
-
-   STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
-
-   if (cs_prog_data->push.cross_thread.size > 0) {
-      uint32_t *param_copy = param;
-      for (unsigned i = 0;
-           i < cs_prog_data->push.cross_thread.dwords;
-           i++) {
-         assert(prog_data->param[i] != BRW_PARAM_BUILTIN_SUBGROUP_ID);
-         param_copy[i] = brw_param_value(brw, prog, stage_state,
-                                         prog_data->param[i]);
-      }
-   }
-
-   if (cs_prog_data->push.per_thread.size > 0) {
-      for (unsigned t = 0; t < dispatch.threads; t++) {
-         unsigned dst =
-            8 * (cs_prog_data->push.per_thread.regs * t +
-                 cs_prog_data->push.cross_thread.regs);
-         unsigned src = cs_prog_data->push.cross_thread.dwords;
-         for ( ; src < prog_data->nr_params; src++, dst++) {
-            if (prog_data->param[src] == BRW_PARAM_BUILTIN_SUBGROUP_ID) {
-               param[dst] = t;
-            } else {
-               param[dst] = brw_param_value(brw, prog, stage_state,
-                                            prog_data->param[src]);
-            }
-         }
-      }
-   }
-
-   stage_state->push_const_size =
-      cs_prog_data->push.cross_thread.regs +
-      cs_prog_data->push.per_thread.regs;
-}
diff --git a/src/mesa/drivers/dri/i965/gfx6_multisample_state.c b/src/mesa/drivers/dri/i965/gfx6_multisample_state.c
deleted file mode 100644
index b5f2c3b..0000000
--- a/src/mesa/drivers/dri/i965/gfx6_multisample_state.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright Â© 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_batch.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_multisample_state.h"
-#include "main/framebuffer.h"
-
-void
-gfx6_get_sample_position(struct gl_context *ctx,
-                         struct gl_framebuffer *fb,
-                         GLuint index, GLfloat *result)
-{
-   uint8_t bits;
-
-   switch (_mesa_geometric_samples(fb)) {
-   case 1:
-      result[0] = result[1] = 0.5f;
-      return;
-   case 2:
-      bits = brw_multisample_positions_1x_2x >> (8 * index);
-      break;
-   case 4:
-      bits = brw_multisample_positions_4x >> (8 * index);
-      break;
-   case 8:
-      bits = brw_multisample_positions_8x[index >> 2] >> (8 * (index & 3));
-      break;
-   case 16:
-      bits = brw_multisample_positions_16x[index >> 2] >> (8 * (index & 3));
-      break;
-   default:
-      unreachable("Not implemented");
-   }
-
-   /* Convert from U0.4 back to a floating point coordinate. */
-   result[0] = ((bits >> 4) & 0xf) / 16.0f;
-   result[1] = (bits & 0xf) / 16.0f;
-}
diff --git a/src/mesa/drivers/dri/i965/gfx6_queryobj.c b/src/mesa/drivers/dri/i965/gfx6_queryobj.c
deleted file mode 100644
index 85db77f..0000000
--- a/src/mesa/drivers/dri/i965/gfx6_queryobj.c
+++ /dev/null
@@ -1,560 +0,0 @@
-/*
- * Copyright Â© 2008 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *    Kenneth Graunke <kenneth@whitecape.org>
- */
-
-/** @file gfx6_queryobj.c
- *
- * Support for query objects (GL_ARB_occlusion_query, GL_ARB_timer_query,
- * GL_EXT_transform_feedback, and friends) on platforms that support
- * hardware contexts (Gfx6+).
- */
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "perf/intel_perf_regs.h"
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-
-static inline void
-set_query_availability(struct brw_context *brw, struct brw_query_object *query,
-                       bool available)
-{
-   /* For platforms that support ARB_query_buffer_object, we write the
-    * query availability for "pipelined" queries.
-    *
-    * Most counter snapshots are written by the command streamer, by
-    * doing a CS stall and then MI_STORE_REGISTER_MEM.  For these
-    * counters, the CS stall guarantees that the results will be
-    * available when subsequent CS commands run.  So we don't need to
-    * do any additional tracking.
-    *
-    * Other counters (occlusion queries and timestamp) are written by
-    * PIPE_CONTROL, without a CS stall.  This means that we can't be
-    * sure whether the writes have landed yet or not.  Performing a
-    * PIPE_CONTROL with an immediate write will synchronize with
-    * those earlier writes, so we write 1 when the value has landed.
-    */
-   if (brw->ctx.Extensions.ARB_query_buffer_object &&
-       brw_is_query_pipelined(query)) {
-      unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
-
-      if (available) {
-         /* Order available *after* the query results. */
-         flags |= PIPE_CONTROL_FLUSH_ENABLE;
-      } else {
-         /* Make it unavailable *before* any pipelined reads. */
-         flags |= PIPE_CONTROL_CS_STALL;
-      }
-
-      brw_emit_pipe_control_write(brw, flags,
-                                  query->bo, 2 * sizeof(uint64_t),
-                                  available);
-   }
-}
-
-static void
-write_primitives_generated(struct brw_context *brw,
-                           struct brw_bo *query_bo, int stream, int idx)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   brw_emit_mi_flush(brw);
-
-   if (devinfo->ver >= 7 && stream > 0) {
-      brw_store_register_mem64(brw, query_bo,
-                               GFX7_SO_PRIM_STORAGE_NEEDED(stream),
-                               idx * sizeof(uint64_t));
-   } else {
-      brw_store_register_mem64(brw, query_bo, CL_INVOCATION_COUNT,
-                               idx * sizeof(uint64_t));
-   }
-}
-
-static void
-write_xfb_primitives_written(struct brw_context *brw,
-                             struct brw_bo *bo, int stream, int idx)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   brw_emit_mi_flush(brw);
-
-   if (devinfo->ver >= 7) {
-      brw_store_register_mem64(brw, bo, GFX7_SO_NUM_PRIMS_WRITTEN(stream),
-                               idx * sizeof(uint64_t));
-   } else {
-      brw_store_register_mem64(brw, bo, GFX6_SO_NUM_PRIMS_WRITTEN,
-                               idx * sizeof(uint64_t));
-   }
-}
-
-static void
-write_xfb_overflow_streams(struct gl_context *ctx,
-                           struct brw_bo *bo, int stream, int count,
-                           int idx)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   brw_emit_mi_flush(brw);
-
-   for (int i = 0; i < count; i++) {
-      int w_idx = 4 * i + idx;
-      int g_idx = 4 * i + idx + 2;
-
-      if (devinfo->ver >= 7) {
-         brw_store_register_mem64(brw, bo,
-                                  GFX7_SO_NUM_PRIMS_WRITTEN(stream + i),
-                                  g_idx * sizeof(uint64_t));
-         brw_store_register_mem64(brw, bo,
-                                  GFX7_SO_PRIM_STORAGE_NEEDED(stream + i),
-                                  w_idx * sizeof(uint64_t));
-      } else {
-         brw_store_register_mem64(brw, bo,
-                                  GFX6_SO_NUM_PRIMS_WRITTEN,
-                                  g_idx * sizeof(uint64_t));
-         brw_store_register_mem64(brw, bo,
-                                  GFX6_SO_PRIM_STORAGE_NEEDED,
-                                  w_idx * sizeof(uint64_t));
-      }
-   }
-}
-
-static bool
-check_xfb_overflow_streams(uint64_t *results, int count)
-{
-   bool overflow = false;
-
-   for (int i = 0; i < count; i++) {
-      uint64_t *result_i = &results[4 * i];
-
-      if ((result_i[3] - result_i[2]) != (result_i[1] - result_i[0])) {
-         overflow = true;
-         break;
-      }
-   }
-
-   return overflow;
-}
-
-static inline int
-pipeline_target_to_index(int target)
-{
-   if (target == GL_GEOMETRY_SHADER_INVOCATIONS)
-      return MAX_PIPELINE_STATISTICS - 1;
-   else
-      return target - GL_VERTICES_SUBMITTED_ARB;
-}
-
-static void
-emit_pipeline_stat(struct brw_context *brw, struct brw_bo *bo,
-                   int stream, int target, int idx)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   /* One source of confusion is the tessellation shader statistics. The
-    * hardware has no statistics specific to the TE unit. Ideally we could have
-    * the HS primitives for TESS_CONTROL_SHADER_PATCHES_ARB, and the DS
-    * invocations as the register for TESS_CONTROL_SHADER_PATCHES_ARB.
-    * Unfortunately we don't have HS primitives, we only have HS invocations.
-    */
-
-   /* Everything except GEOMETRY_SHADER_INVOCATIONS can be kept in a simple
-    * lookup table
-    */
-   static const uint32_t target_to_register[] = {
-      IA_VERTICES_COUNT,   /* VERTICES_SUBMITTED */
-      IA_PRIMITIVES_COUNT, /* PRIMITIVES_SUBMITTED */
-      VS_INVOCATION_COUNT, /* VERTEX_SHADER_INVOCATIONS */
-      HS_INVOCATION_COUNT, /* TESS_CONTROL_SHADER_PATCHES */
-      DS_INVOCATION_COUNT, /* TESS_EVALUATION_SHADER_INVOCATIONS */
-      GS_PRIMITIVES_COUNT, /* GEOMETRY_SHADER_PRIMITIVES_EMITTED */
-      PS_INVOCATION_COUNT, /* FRAGMENT_SHADER_INVOCATIONS */
-      CS_INVOCATION_COUNT, /* COMPUTE_SHADER_INVOCATIONS */
-      CL_INVOCATION_COUNT, /* CLIPPING_INPUT_PRIMITIVES */
-      CL_PRIMITIVES_COUNT, /* CLIPPING_OUTPUT_PRIMITIVES */
-      GS_INVOCATION_COUNT /* This one is special... */
-   };
-   STATIC_ASSERT(ARRAY_SIZE(target_to_register) == MAX_PIPELINE_STATISTICS);
-   uint32_t reg = target_to_register[pipeline_target_to_index(target)];
-   /* Gfx6 GS code counts full primitives, that is, it won't count individual
-    * triangles in a triangle strip. Use CL_INVOCATION_COUNT for that.
-    */
-   if (devinfo->ver == 6 && target == GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB)
-      reg = CL_INVOCATION_COUNT;
-   assert(reg != 0);
-
-   /* Emit a flush to make sure various parts of the pipeline are complete and
-    * we get an accurate value
-    */
-   brw_emit_mi_flush(brw);
-
-   brw_store_register_mem64(brw, bo, reg, idx * sizeof(uint64_t));
-}
-
-
-/**
- * Wait on the query object's BO and calculate the final result.
- */
-static void
-gfx6_queryobj_get_results(struct gl_context *ctx,
-                          struct brw_query_object *query)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (query->bo == NULL)
-      return;
-
-   uint64_t *results = brw_bo_map(brw, query->bo, MAP_READ);
-   switch (query->Base.Target) {
-   case GL_TIME_ELAPSED:
-      /* The query BO contains the starting and ending timestamps.
-       * Subtract the two and convert to nanoseconds.
-       */
-      query->Base.Result = brw_raw_timestamp_delta(brw, results[0], results[1]);
-      query->Base.Result = intel_device_info_timebase_scale(devinfo, query->Base.Result);
-      break;
-
-   case GL_TIMESTAMP:
-      /* The query BO contains a single timestamp value in results[0]. */
-      query->Base.Result = intel_device_info_timebase_scale(devinfo, results[0]);
-
-      /* Ensure the scaled timestamp overflows according to
-       * GL_QUERY_COUNTER_BITS
-       */
-      query->Base.Result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1;
-      break;
-
-   case GL_SAMPLES_PASSED_ARB:
-      /* We need to use += rather than = here since some BLT-based operations
-       * may have added additional samples to our occlusion query value.
-       */
-      query->Base.Result += results[1] - results[0];
-      break;
-
-   case GL_ANY_SAMPLES_PASSED:
-   case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
-      if (results[0] != results[1])
-         query->Base.Result = true;
-      break;
-
-   case GL_PRIMITIVES_GENERATED:
-   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
-   case GL_VERTICES_SUBMITTED_ARB:
-   case GL_PRIMITIVES_SUBMITTED_ARB:
-   case GL_VERTEX_SHADER_INVOCATIONS_ARB:
-   case GL_GEOMETRY_SHADER_INVOCATIONS:
-   case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
-   case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
-   case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
-   case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
-   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
-   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
-      query->Base.Result = results[1] - results[0];
-      break;
-
-   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
-      query->Base.Result = check_xfb_overflow_streams(results, 1);
-      break;
-
-   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
-      query->Base.Result = check_xfb_overflow_streams(results, MAX_VERTEX_STREAMS);
-      break;
-
-   case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
-      query->Base.Result = (results[1] - results[0]);
-      /* Implement the "WaDividePSInvocationCountBy4:HSW,BDW" workaround:
-       * "Invocation counter is 4 times actual.  WA: SW to divide HW reported
-       *  PS Invocations value by 4."
-       *
-       * Prior to Haswell, invocation count was counted by the WM, and it
-       * buggily counted invocations in units of subspans (2x2 unit). To get the
-       * correct value, the CS multiplied this by 4. With HSW the logic moved,
-       * and correctly emitted the number of pixel shader invocations, but,
-       * whomever forgot to undo the multiply by 4.
-       */
-      if (devinfo->ver == 8 || devinfo->verx10 == 75)
-         query->Base.Result /= 4;
-      break;
-
-   default:
-      unreachable("Unrecognized query target in brw_queryobj_get_results()");
-   }
-   brw_bo_unmap(query->bo);
-
-   /* Now that we've processed the data stored in the query's buffer object,
-    * we can release it.
-    */
-   brw_bo_unreference(query->bo);
-   query->bo = NULL;
-
-   query->Base.Ready = true;
-}
-
-/**
- * Driver hook for glBeginQuery().
- *
- * Initializes driver structures and emits any GPU commands required to begin
- * recording data for the query.
- */
-static void
-gfx6_begin_query(struct gl_context *ctx, struct gl_query_object *q)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_query_object *query = (struct brw_query_object *)q;
-
-   /* Since we're starting a new query, we need to throw away old results. */
-   brw_bo_unreference(query->bo);
-   query->bo =
-      brw_bo_alloc(brw->bufmgr, "query results", 4096, BRW_MEMZONE_OTHER);
-
-   /* For ARB_query_buffer_object: The result is not available */
-   set_query_availability(brw, query, false);
-
-   switch (query->Base.Target) {
-   case GL_TIME_ELAPSED:
-      /* For timestamp queries, we record the starting time right away so that
-       * we measure the full time between BeginQuery and EndQuery.  There's
-       * some debate about whether this is the right thing to do.  Our decision
-       * is based on the following text from the ARB_timer_query extension:
-       *
-       * "(5) Should the extension measure total time elapsed between the full
-       *      completion of the BeginQuery and EndQuery commands, or just time
-       *      spent in the graphics library?
-       *
-       *  RESOLVED:  This extension will measure the total time elapsed
-       *  between the full completion of these commands.  Future extensions
-       *  may implement a query to determine time elapsed at different stages
-       *  of the graphics pipeline."
-       *
-       * We write a starting timestamp now (at index 0).  At EndQuery() time,
-       * we'll write a second timestamp (at index 1), and subtract the two to
-       * obtain the time elapsed.  Notably, this includes time elapsed while
-       * the system was doing other work, such as running other applications.
-       */
-      brw_write_timestamp(brw, query->bo, 0);
-      break;
-
-   case GL_ANY_SAMPLES_PASSED:
-   case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
-   case GL_SAMPLES_PASSED_ARB:
-      brw_write_depth_count(brw, query->bo, 0);
-      break;
-
-   case GL_PRIMITIVES_GENERATED:
-      write_primitives_generated(brw, query->bo, query->Base.Stream, 0);
-      if (query->Base.Stream == 0)
-         ctx->NewDriverState |= BRW_NEW_RASTERIZER_DISCARD;
-      break;
-
-   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
-      write_xfb_primitives_written(brw, query->bo, query->Base.Stream, 0);
-      break;
-
-   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
-      write_xfb_overflow_streams(ctx, query->bo, query->Base.Stream, 1, 0);
-      break;
-
-   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
-      write_xfb_overflow_streams(ctx, query->bo, 0, MAX_VERTEX_STREAMS, 0);
-      break;
-
-   case GL_VERTICES_SUBMITTED_ARB:
-   case GL_PRIMITIVES_SUBMITTED_ARB:
-   case GL_VERTEX_SHADER_INVOCATIONS_ARB:
-   case GL_GEOMETRY_SHADER_INVOCATIONS:
-   case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
-   case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
-   case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
-   case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
-   case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
-   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
-   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
-      emit_pipeline_stat(brw, query->bo, query->Base.Stream, query->Base.Target, 0);
-      break;
-
-   default:
-      unreachable("Unrecognized query target in brw_begin_query()");
-   }
-}
-
-/**
- * Driver hook for glEndQuery().
- *
- * Emits GPU commands to record a final query value, ending any data capturing.
- * However, the final result isn't necessarily available until the GPU processes
- * those commands.  brw_queryobj_get_results() processes the captured data to
- * produce the final result.
- */
-static void
-gfx6_end_query(struct gl_context *ctx, struct gl_query_object *q)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_query_object *query = (struct brw_query_object *)q;
-
-   switch (query->Base.Target) {
-   case GL_TIME_ELAPSED:
-      brw_write_timestamp(brw, query->bo, 1);
-      break;
-
-   case GL_ANY_SAMPLES_PASSED:
-   case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
-   case GL_SAMPLES_PASSED_ARB:
-      brw_write_depth_count(brw, query->bo, 1);
-      break;
-
-   case GL_PRIMITIVES_GENERATED:
-      write_primitives_generated(brw, query->bo, query->Base.Stream, 1);
-      if (query->Base.Stream == 0)
-         ctx->NewDriverState |= BRW_NEW_RASTERIZER_DISCARD;
-      break;
-
-   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
-      write_xfb_primitives_written(brw, query->bo, query->Base.Stream, 1);
-      break;
-
-   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
-      write_xfb_overflow_streams(ctx, query->bo, query->Base.Stream, 1, 1);
-      break;
-
-   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
-      write_xfb_overflow_streams(ctx, query->bo, 0, MAX_VERTEX_STREAMS, 1);
-      break;
-
-      /* calculate overflow here */
-   case GL_VERTICES_SUBMITTED_ARB:
-   case GL_PRIMITIVES_SUBMITTED_ARB:
-   case GL_VERTEX_SHADER_INVOCATIONS_ARB:
-   case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
-   case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
-   case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
-   case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
-   case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
-   case GL_GEOMETRY_SHADER_INVOCATIONS:
-   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
-   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
-      emit_pipeline_stat(brw, query->bo,
-                         query->Base.Stream, query->Base.Target, 1);
-      break;
-
-   default:
-      unreachable("Unrecognized query target in brw_end_query()");
-   }
-
-   /* The current batch contains the commands to handle EndQuery(),
-    * but they won't actually execute until it is flushed.
-    */
-   query->flushed = false;
-
-   /* For ARB_query_buffer_object: The result is now available */
-   set_query_availability(brw, query, true);
-}
-
-/**
- * Flush the batch if it still references the query object BO.
- */
-static void
-flush_batch_if_needed(struct brw_context *brw, struct brw_query_object *query)
-{
-   /* If the batch doesn't reference the BO, it must have been flushed
-    * (for example, due to being full).  Record that it's been flushed.
-    */
-   query->flushed = query->flushed ||
-                    !brw_batch_references(&brw->batch, query->bo);
-
-   if (!query->flushed)
-      brw_batch_flush(brw);
-}
-
-/**
- * The WaitQuery() driver hook.
- *
- * Wait for a query result to become available and return it.  This is the
- * backing for glGetQueryObjectiv() with the GL_QUERY_RESULT pname.
- */
-static void gfx6_wait_query(struct gl_context *ctx, struct gl_query_object *q)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_query_object *query = (struct brw_query_object *)q;
-
-   /* If the application has requested the query result, but this batch is
-    * still contributing to it, flush it now to finish that work so the
-    * result will become available (eventually).
-    */
-   flush_batch_if_needed(brw, query);
-
-   gfx6_queryobj_get_results(ctx, query);
-}
-
-/**
- * The CheckQuery() driver hook.
- *
- * Checks whether a query result is ready yet.  If not, flushes.
- * This is the backing for glGetQueryObjectiv()'s QUERY_RESULT_AVAILABLE pname.
- */
-static void gfx6_check_query(struct gl_context *ctx, struct gl_query_object *q)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_query_object *query = (struct brw_query_object *)q;
-
-   /* If query->bo is NULL, we've already gathered the results - this is a
-    * redundant CheckQuery call.  Ignore it.
-    */
-   if (query->bo == NULL)
-      return;
-
-   /* From the GL_ARB_occlusion_query spec:
-    *
-    *     "Instead of allowing for an infinite loop, performing a
-    *      QUERY_RESULT_AVAILABLE_ARB will perform a flush if the result is
-    *      not ready yet on the first time it is queried.  This ensures that
-    *      the async query will return true in finite time.
-    */
-   flush_batch_if_needed(brw, query);
-
-   if (!brw_bo_busy(query->bo)) {
-      gfx6_queryobj_get_results(ctx, query);
-   }
-}
-
-static void
-gfx6_query_counter(struct gl_context *ctx, struct gl_query_object *q)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_query_object *query = (struct brw_query_object *)q;
-   brw_query_counter(ctx, q);
-   set_query_availability(brw, query, true);
-}
-
-/* Initialize Gfx6+-specific query object functions. */
-void gfx6_init_queryobj_functions(struct dd_function_table *functions)
-{
-   functions->BeginQuery = gfx6_begin_query;
-   functions->EndQuery = gfx6_end_query;
-   functions->CheckQuery = gfx6_check_query;
-   functions->WaitQuery = gfx6_wait_query;
-   functions->QueryCounter = gfx6_query_counter;
-}
diff --git a/src/mesa/drivers/dri/i965/gfx6_sampler_state.c b/src/mesa/drivers/dri/i965/gfx6_sampler_state.c
deleted file mode 100644
index 4a5481f..0000000
--- a/src/mesa/drivers/dri/i965/gfx6_sampler_state.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright Â© 2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *
- */
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "brw_batch.h"
-
-static void
-upload_sampler_state_pointers(struct brw_context *brw)
-{
-   BEGIN_BATCH(4);
-   OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS << 16 |
-             VS_SAMPLER_STATE_CHANGE |
-             GS_SAMPLER_STATE_CHANGE |
-             PS_SAMPLER_STATE_CHANGE |
-             (4 - 2));
-   OUT_BATCH(brw->vs.base.sampler_offset); /* VS */
-   OUT_BATCH(brw->gs.base.sampler_offset); /* GS */
-   OUT_BATCH(brw->wm.base.sampler_offset);
-   ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state gfx6_sampler_state = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_SAMPLER_STATE_TABLE |
-             BRW_NEW_STATE_BASE_ADDRESS,
-   },
-   .emit = upload_sampler_state_pointers,
-};
diff --git a/src/mesa/drivers/dri/i965/gfx6_sol.c b/src/mesa/drivers/dri/i965/gfx6_sol.c
deleted file mode 100644
index 56470da..0000000
--- a/src/mesa/drivers/dri/i965/gfx6_sol.c
+++ /dev/null
@@ -1,522 +0,0 @@
-/*
- * Copyright Â© 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/** \file gfx6_sol.c
- *
- * Code to initialize the binding table entries used by transform feedback.
- */
-
-#include "main/bufferobj.h"
-#include "main/macros.h"
-#include "brw_context.h"
-#include "brw_batch.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "main/transformfeedback.h"
-#include "util/u_memory.h"
-
-static void
-gfx6_update_sol_surfaces(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   bool xfb_active = _mesa_is_xfb_active_and_unpaused(ctx);
-   struct gl_transform_feedback_object *xfb_obj;
-   const struct gl_transform_feedback_info *linked_xfb_info = NULL;
-
-   if (xfb_active) {
-      /* BRW_NEW_TRANSFORM_FEEDBACK */
-      xfb_obj = ctx->TransformFeedback.CurrentObject;
-      linked_xfb_info = xfb_obj->program->sh.LinkedTransformFeedback;
-   }
-
-   for (int i = 0; i < BRW_MAX_SOL_BINDINGS; ++i) {
-      const int surf_index = BRW_GFX6_SOL_BINDING_START + i;
-      if (xfb_active && i < linked_xfb_info->NumOutputs) {
-         unsigned buffer = linked_xfb_info->Outputs[i].OutputBuffer;
-         unsigned buffer_offset =
-            xfb_obj->Offset[buffer] / 4 +
-            linked_xfb_info->Outputs[i].DstOffset;
-         if (brw->programs[MESA_SHADER_GEOMETRY]) {
-            brw_update_sol_surface(
-               brw, xfb_obj->Buffers[buffer],
-               &brw->gs.base.surf_offset[surf_index],
-               linked_xfb_info->Outputs[i].NumComponents,
-               linked_xfb_info->Buffers[buffer].Stride, buffer_offset);
-         } else {
-            brw_update_sol_surface(
-               brw, xfb_obj->Buffers[buffer],
-               &brw->ff_gs.surf_offset[surf_index],
-               linked_xfb_info->Outputs[i].NumComponents,
-               linked_xfb_info->Buffers[buffer].Stride, buffer_offset);
-         }
-      } else {
-         if (!brw->programs[MESA_SHADER_GEOMETRY])
-            brw->ff_gs.surf_offset[surf_index] = 0;
-         else
-            brw->gs.base.surf_offset[surf_index] = 0;
-      }
-   }
-
-   brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
-}
-
-const struct brw_tracked_state gfx6_sol_surface = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_TRANSFORM_FEEDBACK,
-   },
-   .emit = gfx6_update_sol_surfaces,
-};
-
-/**
- * Constructs the binding table for the WM surface state, which maps unit
- * numbers to surface state objects.
- */
-static void
-brw_gs_upload_binding_table(struct brw_context *brw)
-{
-   uint32_t *bind;
-   struct gl_context *ctx = &brw->ctx;
-   const struct gl_program *prog;
-   bool need_binding_table = false;
-
-   /* We have two scenarios here:
-    * 1) We are using a geometry shader only to implement transform feedback
-    *    for a vertex shader (brw->programs[MESA_SHADER_GEOMETRY] == NULL).
-    *    In this case, we only need surfaces for transform feedback in the
-    *    GS stage.
-    * 2) We have a user-provided geometry shader. In this case we may need
-    *    surfaces for transform feedback and/or other stuff, like textures,
-    *    in the GS stage.
-    */
-
-   if (!brw->programs[MESA_SHADER_GEOMETRY]) {
-      /* BRW_NEW_VERTEX_PROGRAM */
-      prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
-      if (prog) {
-         /* Skip making a binding table if we don't have anything to put in it */
-         const struct gl_transform_feedback_info *linked_xfb_info =
-            prog->sh.LinkedTransformFeedback;
-         need_binding_table = linked_xfb_info->NumOutputs > 0;
-      }
-      if (!need_binding_table) {
-         if (brw->ff_gs.bind_bo_offset != 0) {
-            brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
-            brw->ff_gs.bind_bo_offset = 0;
-         }
-         return;
-      }
-
-      /* Might want to calculate nr_surfaces first, to avoid taking up so much
-       * space for the binding table. Anyway, in this case we know that we only
-       * use BRW_MAX_SOL_BINDINGS surfaces at most.
-       */
-      bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_MAX_SOL_BINDINGS,
-                             32, &brw->ff_gs.bind_bo_offset);
-
-      /* BRW_NEW_SURFACES */
-      memcpy(bind, brw->ff_gs.surf_offset,
-             BRW_MAX_SOL_BINDINGS * sizeof(uint32_t));
-   } else {
-      /* BRW_NEW_GEOMETRY_PROGRAM */
-      prog = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
-      if (prog) {
-         /* Skip making a binding table if we don't have anything to put in it */
-         struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
-         const struct gl_transform_feedback_info *linked_xfb_info =
-            prog->sh.LinkedTransformFeedback;
-         need_binding_table = linked_xfb_info->NumOutputs > 0 ||
-                              prog_data->binding_table.size_bytes > 0;
-      }
-      if (!need_binding_table) {
-         if (brw->gs.base.bind_bo_offset != 0) {
-            brw->gs.base.bind_bo_offset = 0;
-            brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
-         }
-         return;
-      }
-
-      /* Might want to calculate nr_surfaces first, to avoid taking up so much
-       * space for the binding table.
-       */
-      bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_MAX_SURFACES,
-                             32, &brw->gs.base.bind_bo_offset);
-
-      /* BRW_NEW_SURFACES */
-      memcpy(bind, brw->gs.base.surf_offset,
-             BRW_MAX_SURFACES * sizeof(uint32_t));
-   }
-
-   brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
-}
-
-const struct brw_tracked_state gfx6_gs_binding_table = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_GEOMETRY_PROGRAM |
-             BRW_NEW_VERTEX_PROGRAM |
-             BRW_NEW_SURFACES,
-   },
-   .emit = brw_gs_upload_binding_table,
-};
-
-struct gl_transform_feedback_object *
-brw_new_transform_feedback(struct gl_context *ctx, GLuint name)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_transform_feedback_object *brw_obj =
-      CALLOC_STRUCT(brw_transform_feedback_object);
-   if (!brw_obj)
-      return NULL;
-
-   _mesa_init_transform_feedback_object(&brw_obj->base, name);
-
-   brw_obj->offset_bo =
-      brw_bo_alloc(brw->bufmgr, "transform feedback offsets", 16,
-                   BRW_MEMZONE_OTHER);
-   brw_obj->prim_count_bo =
-      brw_bo_alloc(brw->bufmgr, "xfb primitive counts", 16384,
-                   BRW_MEMZONE_OTHER);
-
-   return &brw_obj->base;
-}
-
-void
-brw_delete_transform_feedback(struct gl_context *ctx,
-                              struct gl_transform_feedback_object *obj)
-{
-   struct brw_transform_feedback_object *brw_obj =
-      (struct brw_transform_feedback_object *) obj;
-
-   brw_bo_unreference(brw_obj->offset_bo);
-   brw_bo_unreference(brw_obj->prim_count_bo);
-
-   _mesa_delete_transform_feedback_object(ctx, obj);
-}
-
-/**
- * Tally the number of primitives generated so far.
- *
- * The buffer contains a series of pairs:
- * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
- * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
- *
- * For each stream, we subtract the pair of values (end - start) to get the
- * number of primitives generated during one section.  We accumulate these
- * values, adding them up to get the total number of primitives generated.
- *
- * Note that we expose one stream pre-Gfx7, so the above is just (start, end).
- */
-static void
-aggregate_transform_feedback_counter(
-   struct brw_context *brw,
-   struct brw_bo *bo,
-   struct brw_transform_feedback_counter *counter)
-{
-   const unsigned streams = brw->ctx.Const.MaxVertexStreams;
-
-   /* If the current batch is still contributing to the number of primitives
-    * generated, flush it now so the results will be present when mapped.
-    */
-   if (brw_batch_references(&brw->batch, bo))
-      brw_batch_flush(brw);
-
-   if (unlikely(brw->perf_debug && brw_bo_busy(bo)))
-      perf_debug("Stalling for # of transform feedback primitives written.\n");
-
-   uint64_t *prim_counts = brw_bo_map(brw, bo, MAP_READ);
-   prim_counts += counter->bo_start * streams;
-
-   for (unsigned i = counter->bo_start; i + 1 < counter->bo_end; i += 2) {
-      for (unsigned s = 0; s < streams; s++)
-         counter->accum[s] += prim_counts[streams + s] - prim_counts[s];
-
-      prim_counts += 2 * streams;
-   }
-
-   brw_bo_unmap(bo);
-
-   /* We've already gathered up the old data; we can safely overwrite it now. */
-   counter->bo_start = counter->bo_end = 0;
-}
-
-/**
- * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
- * to prim_count_bo.
- *
- * If prim_count_bo is out of space, gather up the results so far into
- * prims_generated[] and allocate a new buffer with enough space.
- *
- * The number of primitives written is used to compute the number of vertices
- * written to a transform feedback stream, which is required to implement
- * DrawTransformFeedback().
- */
-void
-brw_save_primitives_written_counters(struct brw_context *brw,
-                                     struct brw_transform_feedback_object *obj)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const struct gl_context *ctx = &brw->ctx;
-   const int streams = ctx->Const.MaxVertexStreams;
-
-   assert(obj->prim_count_bo != NULL);
-
-   /* Check if there's enough space for a new pair of four values. */
-   if ((obj->counter.bo_end + 2) * streams * sizeof(uint64_t) >=
-       obj->prim_count_bo->size) {
-      aggregate_transform_feedback_counter(brw, obj->prim_count_bo,
-                                           &obj->previous_counter);
-      aggregate_transform_feedback_counter(brw, obj->prim_count_bo,
-                                           &obj->counter);
-   }
-
-   /* Flush any drawing so that the counters have the right values. */
-   brw_emit_mi_flush(brw);
-
-   /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
-   if (devinfo->ver >= 7) {
-      for (int i = 0; i < streams; i++) {
-         int offset = (streams * obj->counter.bo_end + i) * sizeof(uint64_t);
-         brw_store_register_mem64(brw, obj->prim_count_bo,
-                                  GFX7_SO_NUM_PRIMS_WRITTEN(i),
-                                  offset);
-      }
-   } else {
-      brw_store_register_mem64(brw, obj->prim_count_bo,
-                               GFX6_SO_NUM_PRIMS_WRITTEN,
-                               obj->counter.bo_end * sizeof(uint64_t));
-   }
-
-   /* Update where to write data to. */
-   obj->counter.bo_end++;
-}
-
-static void
-compute_vertices_written_so_far(struct brw_context *brw,
-                                struct brw_transform_feedback_object *obj,
-                                struct brw_transform_feedback_counter *counter,
-                                uint64_t *vertices_written)
-{
-   const struct gl_context *ctx = &brw->ctx;
-   unsigned vertices_per_prim = 0;
-
-   switch (obj->primitive_mode) {
-   case GL_POINTS:
-      vertices_per_prim = 1;
-      break;
-   case GL_LINES:
-      vertices_per_prim = 2;
-      break;
-   case GL_TRIANGLES:
-      vertices_per_prim = 3;
-      break;
-   default:
-      unreachable("Invalid transform feedback primitive mode.");
-   }
-
-   /* Get the number of primitives generated. */
-   aggregate_transform_feedback_counter(brw, obj->prim_count_bo, counter);
-
-   for (int i = 0; i < ctx->Const.MaxVertexStreams; i++) {
-      vertices_written[i] = vertices_per_prim * counter->accum[i];
-   }
-}
-
-/**
- * Compute the number of vertices written by the last transform feedback
- * begin/end block.
- */
-static void
-compute_xfb_vertices_written(struct brw_context *brw,
-                             struct brw_transform_feedback_object *obj)
-{
-   if (obj->vertices_written_valid || !obj->base.EndedAnytime)
-      return;
-
-   compute_vertices_written_so_far(brw, obj, &obj->previous_counter,
-                                   obj->vertices_written);
-   obj->vertices_written_valid = true;
-}
-
-/**
- * GetTransformFeedbackVertexCount() driver hook.
- *
- * Returns the number of vertices written to a particular stream by the last
- * Begin/EndTransformFeedback block.  Used to implement DrawTransformFeedback().
- */
-GLsizei
-brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
-                                        struct gl_transform_feedback_object *obj,
-                                        GLuint stream)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_transform_feedback_object *brw_obj =
-      (struct brw_transform_feedback_object *) obj;
-
-   assert(obj->EndedAnytime);
-   assert(stream < ctx->Const.MaxVertexStreams);
-
-   compute_xfb_vertices_written(brw, brw_obj);
-   return brw_obj->vertices_written[stream];
-}
-
-void
-brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
-                             struct gl_transform_feedback_object *obj)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct gl_program *prog;
-   const struct gl_transform_feedback_info *linked_xfb_info;
-   struct gl_transform_feedback_object *xfb_obj =
-      ctx->TransformFeedback.CurrentObject;
-   struct brw_transform_feedback_object *brw_obj =
-      (struct brw_transform_feedback_object *) xfb_obj;
-
-   assert(brw->screen->devinfo.ver == 6);
-
-   if (ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]) {
-      /* BRW_NEW_GEOMETRY_PROGRAM */
-      prog = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
-   } else {
-      /* BRW_NEW_VERTEX_PROGRAM */
-      prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
-   }
-   linked_xfb_info = prog->sh.LinkedTransformFeedback;
-
-   /* Compute the maximum number of vertices that we can write without
-    * overflowing any of the buffers currently being used for feedback.
-    */
-   brw_obj->max_index
-      = _mesa_compute_max_transform_feedback_vertices(ctx, xfb_obj,
-                                                      linked_xfb_info);
-
-   /* Initialize the SVBI 0 register to zero and set the maximum index. */
-   BEGIN_BATCH(4);
-   OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
-   OUT_BATCH(0); /* SVBI 0 */
-   OUT_BATCH(0); /* starting index */
-   OUT_BATCH(brw_obj->max_index);
-   ADVANCE_BATCH();
-
-   /* Initialize the rest of the unused streams to sane values.  Otherwise,
-    * they may indicate that there is no room to write data and prevent
-    * anything from happening at all.
-    */
-   for (int i = 1; i < 4; i++) {
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
-      OUT_BATCH(i << SVB_INDEX_SHIFT);
-      OUT_BATCH(0); /* starting index */
-      OUT_BATCH(0xffffffff);
-      ADVANCE_BATCH();
-   }
-
-   /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */
-   brw_save_primitives_written_counters(brw, brw_obj);
-
-   brw_obj->primitive_mode = mode;
-}
-
-void
-brw_end_transform_feedback(struct gl_context *ctx,
-                           struct gl_transform_feedback_object *obj)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_transform_feedback_object *brw_obj =
-      (struct brw_transform_feedback_object *) obj;
-
-   /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */
-   if (!obj->Paused)
-      brw_save_primitives_written_counters(brw, brw_obj);
-
-   /* We've reached the end of a transform feedback begin/end block.  This
-    * means that future DrawTransformFeedback() calls will need to pick up the
-    * results of the current counter, and that it's time to roll back the
-    * current primitive counter to zero.
-    */
-   brw_obj->previous_counter = brw_obj->counter;
-   brw_reset_transform_feedback_counter(&brw_obj->counter);
-
-   /* EndTransformFeedback() means that we need to update the number of
-    * vertices written.  Since it's only necessary if DrawTransformFeedback()
-    * is called and it means mapping a buffer object, we delay computing it
-    * until it's absolutely necessary to try and avoid stalls.
-    */
-   brw_obj->vertices_written_valid = false;
-}
-
-void
-brw_pause_transform_feedback(struct gl_context *ctx,
-                             struct gl_transform_feedback_object *obj)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_transform_feedback_object *brw_obj =
-      (struct brw_transform_feedback_object *) obj;
-
-   /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters.
-    * While this operation is paused, other transform feedback actions may
-    * occur, which will contribute to the counters.  We need to exclude that
-    * from our counts.
-    */
-   brw_save_primitives_written_counters(brw, brw_obj);
-}
-
-void
-brw_resume_transform_feedback(struct gl_context *ctx,
-                              struct gl_transform_feedback_object *obj)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_transform_feedback_object *brw_obj =
-      (struct brw_transform_feedback_object *) obj;
-
-   /* Reload SVBI 0 with the count of vertices written so far. */
-   uint64_t svbi;
-   compute_vertices_written_so_far(brw, brw_obj, &brw_obj->counter, &svbi);
-
-   BEGIN_BATCH(4);
-   OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
-   OUT_BATCH(0); /* SVBI 0 */
-   OUT_BATCH((uint32_t) svbi); /* starting index */
-   OUT_BATCH(brw_obj->max_index);
-   ADVANCE_BATCH();
-
-   /* Initialize the rest of the unused streams to sane values.  Otherwise,
-    * they may indicate that there is no room to write data and prevent
-    * anything from happening at all.
-    */
-   for (int i = 1; i < 4; i++) {
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
-      OUT_BATCH(i << SVB_INDEX_SHIFT);
-      OUT_BATCH(0); /* starting index */
-      OUT_BATCH(0xffffffff);
-      ADVANCE_BATCH();
-   }
-
-   /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
-   brw_save_primitives_written_counters(brw, brw_obj);
-}
diff --git a/src/mesa/drivers/dri/i965/gfx6_urb.c b/src/mesa/drivers/dri/i965/gfx6_urb.c
deleted file mode 100644
index 8b69409..0000000
--- a/src/mesa/drivers/dri/i965/gfx6_urb.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright Â© 2009 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *
- */
-
-#include "main/macros.h"
-#include "brw_batch.h"
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-
-/**
- * When the GS is not in use, we assign the entire URB space to the VS.  When
- * the GS is in use, we split the URB space evenly between the VS and the GS.
- * This is not ideal, but it's simple.
- *
- *           URB size / 2                   URB size / 2
- *   _____________-______________   _____________-______________
- *  /                            \ /                            \
- * +-------------------------------------------------------------+
- * | Vertex Shader Entries        | Geometry Shader Entries      |
- * +-------------------------------------------------------------+
- *
- * Sandybridge GT1 has 32kB of URB space, while GT2 has 64kB.
- * (See the Sandybridge PRM, Volume 2, Part 1, Section 1.4.7: 3DSTATE_URB.)
- */
-void
-gfx6_upload_urb(struct brw_context *brw, unsigned vs_size,
-                bool gs_present, unsigned gs_size)
-{
-   int nr_vs_entries, nr_gs_entries;
-   int total_urb_size = brw->urb.size * 1024; /* in bytes */
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   /* Calculate how many entries fit in each stage's section of the URB */
-   if (gs_present) {
-      nr_vs_entries = (total_urb_size/2) / (vs_size * 128);
-      nr_gs_entries = (total_urb_size/2) / (gs_size * 128);
-   } else {
-      nr_vs_entries = total_urb_size / (vs_size * 128);
-      nr_gs_entries = 0;
-   }
-
-   /* Then clamp to the maximum allowed by the hardware */
-   if (nr_vs_entries > devinfo->urb.max_entries[MESA_SHADER_VERTEX])
-      nr_vs_entries = devinfo->urb.max_entries[MESA_SHADER_VERTEX];
-
-   if (nr_gs_entries > devinfo->urb.max_entries[MESA_SHADER_GEOMETRY])
-      nr_gs_entries = devinfo->urb.max_entries[MESA_SHADER_GEOMETRY];
-
-   /* Finally, both must be a multiple of 4 (see 3DSTATE_URB in the PRM). */
-   brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4);
-   brw->urb.nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, 4);
-
-   assert(brw->urb.nr_vs_entries >=
-          devinfo->urb.min_entries[MESA_SHADER_VERTEX]);
-   assert(brw->urb.nr_vs_entries % 4 == 0);
-   assert(brw->urb.nr_gs_entries % 4 == 0);
-   assert(vs_size <= 5);
-   assert(gs_size <= 5);
-
-   BEGIN_BATCH(3);
-   OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
-   OUT_BATCH(((vs_size - 1) << GFX6_URB_VS_SIZE_SHIFT) |
-             ((brw->urb.nr_vs_entries) << GFX6_URB_VS_ENTRIES_SHIFT));
-   OUT_BATCH(((gs_size - 1) << GFX6_URB_GS_SIZE_SHIFT) |
-             ((brw->urb.nr_gs_entries) << GFX6_URB_GS_ENTRIES_SHIFT));
-   ADVANCE_BATCH();
-
-   /* From the PRM Volume 2 part 1, section 1.4.7:
-    *
-    *   Because of a urb corruption caused by allocating a previous gsunitâs
-    *   urb entry to vsunit software is required to send a "GS NULL
-    *   Fence"(Send URB fence with VS URB size == 1 and GS URB size == 0) plus
-    *   a dummy DRAW call before any case where VS will be taking over GS URB
-    *   space.
-    *
-    * It is not clear exactly what this means ("URB fence" is a command that
-    * doesn't exist on Gfx6).  So for now we just do a full pipeline flush as
-    * a workaround.
-    */
-   if (brw->urb.gs_present && !gs_present)
-      brw_emit_mi_flush(brw);
-   brw->urb.gs_present = gs_present;
-}
-
-static void
-upload_urb(struct brw_context *brw)
-{
-   /* BRW_NEW_VS_PROG_DATA */
-   const struct brw_vue_prog_data *vs_vue_prog_data =
-      brw_vue_prog_data(brw->vs.base.prog_data);
-   const unsigned vs_size = MAX2(vs_vue_prog_data->urb_entry_size, 1);
-
-   /* BRW_NEW_GEOMETRY_PROGRAM, BRW_NEW_GS_PROG_DATA */
-   const bool gs_present =
-      brw->ff_gs.prog_active || brw->programs[MESA_SHADER_GEOMETRY];
-
-   /* Whe using GS to do transform feedback only we use the same VUE layout for
-    * VS outputs and GS outputs (as it's what the SF and Clipper expect), so we
-    * can simply make the GS URB entry size the same as for the VS.  This may
-    * technically be too large in cases where we have few vertex attributes and
-    * a lot of varyings, since the VS size is determined by the larger of the
-    * two. For now, it's safe.
-    *
-    * For user-provided GS the assumption above does not hold since the GS
-    * outputs can be different from the VS outputs.
-    */
-   unsigned gs_size = vs_size;
-   if (brw->programs[MESA_SHADER_GEOMETRY]) {
-      const struct brw_vue_prog_data *gs_vue_prog_data =
-         brw_vue_prog_data(brw->gs.base.prog_data);
-      gs_size = gs_vue_prog_data->urb_entry_size;
-      assert(gs_size >= 1);
-   }
-
-   gfx6_upload_urb(brw, vs_size, gs_present, gs_size);
-}
-
-const struct brw_tracked_state gfx6_urb = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BLORP |
-             BRW_NEW_CONTEXT |
-             BRW_NEW_FF_GS_PROG_DATA |
-             BRW_NEW_GEOMETRY_PROGRAM |
-             BRW_NEW_GS_PROG_DATA |
-             BRW_NEW_VS_PROG_DATA,
-   },
-   .emit = upload_urb,
-};
diff --git a/src/mesa/drivers/dri/i965/gfx7_l3_state.c b/src/mesa/drivers/dri/i965/gfx7_l3_state.c
deleted file mode 100644
index c088dc6..0000000
--- a/src/mesa/drivers/dri/i965/gfx7_l3_state.c
+++ /dev/null
@@ -1,312 +0,0 @@
-/*
- * Copyright (c) 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "common/intel_l3_config.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-#include "brw_batch.h"
-
-/**
- * Calculate the desired L3 partitioning based on the current state of the
- * pipeline.  For now this simply returns the conservative defaults calculated
- * by get_default_l3_weights(), but we could probably do better by gathering
- * more statistics from the pipeline state (e.g. guess of expected URB usage
- * and bound surfaces), or by using feed-back from performance counters.
- */
-static struct intel_l3_weights
-get_pipeline_state_l3_weights(const struct brw_context *brw)
-{
-   const struct brw_stage_state *stage_states[] = {
-      [MESA_SHADER_VERTEX] = &brw->vs.base,
-      [MESA_SHADER_TESS_CTRL] = &brw->tcs.base,
-      [MESA_SHADER_TESS_EVAL] = &brw->tes.base,
-      [MESA_SHADER_GEOMETRY] = &brw->gs.base,
-      [MESA_SHADER_FRAGMENT] = &brw->wm.base,
-      [MESA_SHADER_COMPUTE] = &brw->cs.base
-   };
-   bool needs_dc = false, needs_slm = false;
-
-   for (unsigned i = 0; i < ARRAY_SIZE(stage_states); i++) {
-      const struct gl_program *prog =
-         brw->ctx._Shader->CurrentProgram[stage_states[i]->stage];
-      const struct brw_stage_prog_data *prog_data = stage_states[i]->prog_data;
-
-      needs_dc |= (prog && (prog->sh.data->NumAtomicBuffers ||
-                            prog->sh.data->NumShaderStorageBlocks ||
-                            prog->info.num_images)) ||
-         (prog_data && prog_data->total_scratch);
-      needs_slm |= prog_data && prog_data->total_shared;
-   }
-
-   return intel_get_default_l3_weights(&brw->screen->devinfo,
-                                       needs_dc, needs_slm);
-}
-
-/**
- * Program the hardware to use the specified L3 configuration.
- */
-static void
-setup_l3_config(struct brw_context *brw, const struct intel_l3_config *cfg)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const bool has_dc = cfg->n[INTEL_L3P_DC] || cfg->n[INTEL_L3P_ALL];
-   const bool has_is = cfg->n[INTEL_L3P_IS] || cfg->n[INTEL_L3P_RO] ||
-                       cfg->n[INTEL_L3P_ALL];
-   const bool has_c = cfg->n[INTEL_L3P_C] || cfg->n[INTEL_L3P_RO] ||
-                      cfg->n[INTEL_L3P_ALL];
-   const bool has_t = cfg->n[INTEL_L3P_T] || cfg->n[INTEL_L3P_RO] ||
-                      cfg->n[INTEL_L3P_ALL];
-   const bool has_slm = cfg->n[INTEL_L3P_SLM];
-
-   /* According to the hardware docs, the L3 partitioning can only be changed
-    * while the pipeline is completely drained and the caches are flushed,
-    * which involves a first PIPE_CONTROL flush which stalls the pipeline...
-    */
-   brw_emit_pipe_control_flush(brw,
-                               PIPE_CONTROL_DATA_CACHE_FLUSH |
-                               PIPE_CONTROL_CS_STALL);
-
-   /* ...followed by a second pipelined PIPE_CONTROL that initiates
-    * invalidation of the relevant caches.  Note that because RO invalidation
-    * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL
-    * command is processed by the CS) we cannot combine it with the previous
-    * stalling flush as the hardware documentation suggests, because that
-    * would cause the CS to stall on previous rendering *after* RO
-    * invalidation and wouldn't prevent the RO caches from being polluted by
-    * concurrent rendering before the stall completes.  This intentionally
-    * doesn't implement the SKL+ hardware workaround suggesting to enable CS
-    * stall on PIPE_CONTROLs with the texture cache invalidation bit set for
-    * GPGPU workloads because the previous and subsequent PIPE_CONTROLs
-    * already guarantee that there is no concurrent GPGPU kernel execution
-    * (see SKL HSD 2132585).
-    */
-   brw_emit_pipe_control_flush(brw,
-                               PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
-                               PIPE_CONTROL_CONST_CACHE_INVALIDATE |
-                               PIPE_CONTROL_INSTRUCTION_INVALIDATE |
-                               PIPE_CONTROL_STATE_CACHE_INVALIDATE);
-
-   /* Now send a third stalling flush to make sure that invalidation is
-    * complete when the L3 configuration registers are modified.
-    */
-   brw_emit_pipe_control_flush(brw,
-                               PIPE_CONTROL_DATA_CACHE_FLUSH |
-                               PIPE_CONTROL_CS_STALL);
-
-   if (devinfo->ver >= 8) {
-      assert(!cfg->n[INTEL_L3P_IS] && !cfg->n[INTEL_L3P_C] && !cfg->n[INTEL_L3P_T]);
-
-      const unsigned imm_data = (
-         (devinfo->ver < 11 && has_slm ? GFX8_L3CNTLREG_SLM_ENABLE : 0) |
-         (devinfo->ver == 11 ? GFX11_L3CNTLREG_USE_FULL_WAYS : 0) |
-         SET_FIELD(cfg->n[INTEL_L3P_URB], GFX8_L3CNTLREG_URB_ALLOC) |
-         SET_FIELD(cfg->n[INTEL_L3P_RO], GFX8_L3CNTLREG_RO_ALLOC) |
-         SET_FIELD(cfg->n[INTEL_L3P_DC], GFX8_L3CNTLREG_DC_ALLOC) |
-         SET_FIELD(cfg->n[INTEL_L3P_ALL], GFX8_L3CNTLREG_ALL_ALLOC));
-
-      /* Set up the L3 partitioning. */
-      brw_load_register_imm32(brw, GFX8_L3CNTLREG, imm_data);
-   } else {
-      assert(!cfg->n[INTEL_L3P_ALL]);
-
-      /* When enabled SLM only uses a portion of the L3 on half of the banks,
-       * the matching space on the remaining banks has to be allocated to a
-       * client (URB for all validated configurations) set to the
-       * lower-bandwidth 2-bank address hashing mode.
-       */
-      const bool urb_low_bw = has_slm && devinfo->platform != INTEL_PLATFORM_BYT;
-      assert(!urb_low_bw || cfg->n[INTEL_L3P_URB] == cfg->n[INTEL_L3P_SLM]);
-
-      /* Minimum number of ways that can be allocated to the URB. */
-      const unsigned n0_urb = (devinfo->platform == INTEL_PLATFORM_BYT ? 32 : 0);
-      assert(cfg->n[INTEL_L3P_URB] >= n0_urb);
-
-      BEGIN_BATCH(7);
-      OUT_BATCH(MI_LOAD_REGISTER_IMM | (7 - 2));
-
-      /* Demote any clients with no ways assigned to LLC. */
-      OUT_BATCH(GFX7_L3SQCREG1);
-      OUT_BATCH((devinfo->platform == INTEL_PLATFORM_HSW ? HSW_L3SQCREG1_SQGHPCI_DEFAULT :
-                 devinfo->platform == INTEL_PLATFORM_BYT ? VLV_L3SQCREG1_SQGHPCI_DEFAULT :
-                 IVB_L3SQCREG1_SQGHPCI_DEFAULT) |
-                (has_dc ? 0 : GFX7_L3SQCREG1_CONV_DC_UC) |
-                (has_is ? 0 : GFX7_L3SQCREG1_CONV_IS_UC) |
-                (has_c ? 0 : GFX7_L3SQCREG1_CONV_C_UC) |
-                (has_t ? 0 : GFX7_L3SQCREG1_CONV_T_UC));
-
-      /* Set up the L3 partitioning. */
-      OUT_BATCH(GFX7_L3CNTLREG2);
-      OUT_BATCH((has_slm ? GFX7_L3CNTLREG2_SLM_ENABLE : 0) |
-                SET_FIELD(cfg->n[INTEL_L3P_URB] - n0_urb, GFX7_L3CNTLREG2_URB_ALLOC) |
-                (urb_low_bw ? GFX7_L3CNTLREG2_URB_LOW_BW : 0) |
-                SET_FIELD(cfg->n[INTEL_L3P_ALL], GFX7_L3CNTLREG2_ALL_ALLOC) |
-                SET_FIELD(cfg->n[INTEL_L3P_RO], GFX7_L3CNTLREG2_RO_ALLOC) |
-                SET_FIELD(cfg->n[INTEL_L3P_DC], GFX7_L3CNTLREG2_DC_ALLOC));
-      OUT_BATCH(GFX7_L3CNTLREG3);
-      OUT_BATCH(SET_FIELD(cfg->n[INTEL_L3P_IS], GFX7_L3CNTLREG3_IS_ALLOC) |
-                SET_FIELD(cfg->n[INTEL_L3P_C], GFX7_L3CNTLREG3_C_ALLOC) |
-                SET_FIELD(cfg->n[INTEL_L3P_T], GFX7_L3CNTLREG3_T_ALLOC));
-
-      ADVANCE_BATCH();
-
-      if (can_do_hsw_l3_atomics(brw->screen)) {
-         /* Enable L3 atomics on HSW if we have a DC partition, otherwise keep
-          * them disabled to avoid crashing the system hard.
-          */
-         BEGIN_BATCH(5);
-         OUT_BATCH(MI_LOAD_REGISTER_IMM | (5 - 2));
-         OUT_BATCH(HSW_SCRATCH1);
-         OUT_BATCH(has_dc ? 0 : HSW_SCRATCH1_L3_ATOMIC_DISABLE);
-         OUT_BATCH(HSW_ROW_CHICKEN3);
-         OUT_BATCH(REG_MASK(HSW_ROW_CHICKEN3_L3_ATOMIC_DISABLE) |
-                   (has_dc ? 0 : HSW_ROW_CHICKEN3_L3_ATOMIC_DISABLE));
-         ADVANCE_BATCH();
-      }
-   }
-}
-
-/**
- * Update the URB size in the context state for the specified L3
- * configuration.
- */
-static void
-update_urb_size(struct brw_context *brw, const struct intel_l3_config *cfg)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const unsigned sz = intel_get_l3_config_urb_size(devinfo, cfg);
-
-   if (brw->urb.size != sz) {
-      brw->urb.size = sz;
-      brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE;
-
-      /* If we change the total URB size, reset the individual stage sizes to
-       * zero so that, even if there is no URB size change, gfx7_upload_urb
-       * still re-emits 3DSTATE_URB_*.
-       */
-      brw->urb.vsize = 0;
-      brw->urb.gsize = 0;
-      brw->urb.hsize = 0;
-      brw->urb.dsize = 0;
-   }
-}
-
-void
-brw_emit_l3_state(struct brw_context *brw)
-{
-   const struct intel_l3_weights w = get_pipeline_state_l3_weights(brw);
-   const float dw = intel_diff_l3_weights(w, intel_get_l3_config_weights(brw->l3.config));
-   /* The distance between any two compatible weight vectors cannot exceed two
-    * due to the triangle inequality.
-    */
-   const float large_dw_threshold = 2.0;
-   /* Somewhat arbitrary, simply makes sure that there will be no repeated
-    * transitions to the same L3 configuration, could probably do better here.
-    */
-   const float small_dw_threshold = 0.5;
-   /* If we're emitting a new batch the caches should already be clean and the
-    * transition should be relatively cheap, so it shouldn't hurt much to use
-    * the smaller threshold.  Otherwise use the larger threshold so that we
-    * only reprogram the L3 mid-batch if the most recently programmed
-    * configuration is incompatible with the current pipeline state.
-    */
-   const float dw_threshold = (brw->ctx.NewDriverState & BRW_NEW_BATCH ?
-                               small_dw_threshold : large_dw_threshold);
-
-   if (dw > dw_threshold && can_do_pipelined_register_writes(brw->screen)) {
-      const struct intel_l3_config *const cfg =
-         intel_get_l3_config(&brw->screen->devinfo, w);
-
-      setup_l3_config(brw, cfg);
-      update_urb_size(brw, cfg);
-      brw->l3.config = cfg;
-
-      if (INTEL_DEBUG(DEBUG_L3)) {
-         fprintf(stderr, "L3 config transition (%f > %f): ", dw, dw_threshold);
-         intel_dump_l3_config(cfg, stderr);
-      }
-   }
-}
-
-const struct brw_tracked_state gfx7_l3_state = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BATCH |
-             BRW_NEW_BLORP |
-             BRW_NEW_CS_PROG_DATA |
-             BRW_NEW_FS_PROG_DATA |
-             BRW_NEW_GS_PROG_DATA |
-             BRW_NEW_TCS_PROG_DATA |
-             BRW_NEW_TES_PROG_DATA |
-             BRW_NEW_VS_PROG_DATA,
-   },
-   .emit = brw_emit_l3_state
-};
-
-/**
- * Hack to restore the default L3 configuration.
- *
- * This will be called at the end of every batch in order to reset the L3
- * configuration to the default values for the time being until the kernel is
- * fixed.  Until kernel commit 6702cf16e0ba8b0129f5aa1b6609d4e9c70bc13b
- * (included in v4.1) we would set the MI_RESTORE_INHIBIT bit when submitting
- * batch buffers for the default context used by the DDX, which meant that any
- * context state changed by the GL would leak into the DDX, the assumption
- * being that the DDX would initialize any state it cares about manually.  The
- * DDX is however not careful enough to program an L3 configuration
- * explicitly, and it makes assumptions about it (URB size) which won't hold
- * and cause it to misrender if we let our L3 set-up to leak into the DDX.
- *
- * Since v4.1 of the Linux kernel the default context is saved and restored
- * normally, so it's far less likely for our L3 programming to interfere with
- * other contexts -- In fact restoring the default L3 configuration at the end
- * of the batch will be redundant most of the time.  A kind of state leak is
- * still possible though if the context making assumptions about L3 state is
- * created immediately after our context was active (e.g. without the DDX
- * default context being scheduled in between) because at present the DRM
- * doesn't fully initialize the contents of newly created contexts and instead
- * sets the MI_RESTORE_INHIBIT flag causing it to inherit the state from the
- * last active context.
- *
- * It's possible to realize such a scenario if, say, an X server (or a GL
- * application using an outdated non-L3-aware Mesa version) is started while
- * another GL application is running and happens to have modified the L3
- * configuration, or if no X server is running at all and a GL application
- * using a non-L3-aware Mesa version is started after another GL application
- * ran and modified the L3 configuration -- The latter situation can actually
- * be reproduced easily on IVB in our CI system.
- */
-void
-gfx7_restore_default_l3_config(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   const struct intel_l3_config *const cfg = intel_get_default_l3_config(devinfo);
-
-   if (cfg != brw->l3.config &&
-       can_do_pipelined_register_writes(brw->screen)) {
-      setup_l3_config(brw, cfg);
-      update_urb_size(brw, cfg);
-      brw->l3.config = cfg;
-   }
-}
diff --git a/src/mesa/drivers/dri/i965/gfx7_sol_state.c b/src/mesa/drivers/dri/i965/gfx7_sol_state.c
deleted file mode 100644
index d1a2e5b..0000000
--- a/src/mesa/drivers/dri/i965/gfx7_sol_state.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright Â© 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/**
- * @file gfx7_sol_state.c
- *
- * Controls the stream output logic (SOL) stage of the gfx7 hardware, which is
- * used to implement GL_EXT_transform_feedback.
- */
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-#include "main/transformfeedback.h"
-
-void
-gfx7_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
-                              struct gl_transform_feedback_object *obj)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_transform_feedback_object *brw_obj =
-      (struct brw_transform_feedback_object *) obj;
-
-   assert(brw->screen->devinfo.ver == 7);
-
-   /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */
-   brw_save_primitives_written_counters(brw, brw_obj);
-
-   /* Reset the SO buffer offsets to 0. */
-   if (!can_do_pipelined_register_writes(brw->screen)) {
-      brw_batch_flush(brw);
-      brw->batch.needs_sol_reset = true;
-   } else {
-      for (int i = 0; i < 4; i++) {
-         brw_load_register_imm32(brw, GFX7_SO_WRITE_OFFSET(i), 0);
-      }
-   }
-
-   brw_obj->primitive_mode = mode;
-}
-
-void
-gfx7_end_transform_feedback(struct gl_context *ctx,
-                            struct gl_transform_feedback_object *obj)
-{
-   /* After EndTransformFeedback, it's likely that the client program will try
-    * to draw using the contents of the transform feedback buffer as vertex
-    * input.  In order for this to work, we need to flush the data through at
-    * least the GS stage of the pipeline, and flush out the render cache.  For
-    * simplicity, just do a full flush.
-    */
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_transform_feedback_object *brw_obj =
-      (struct brw_transform_feedback_object *) obj;
-
-   /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */
-   if (!obj->Paused)
-      brw_save_primitives_written_counters(brw, brw_obj);
-
-   /* We've reached the end of a transform feedback begin/end block.  This
-    * means that future DrawTransformFeedback() calls will need to pick up the
-    * results of the current counter, and that it's time to roll back the
-    * current primitive counter to zero.
-    */
-   brw_obj->previous_counter = brw_obj->counter;
-   brw_reset_transform_feedback_counter(&brw_obj->counter);
-
-   /* EndTransformFeedback() means that we need to update the number of
-    * vertices written.  Since it's only necessary if DrawTransformFeedback()
-    * is called and it means mapping a buffer object, we delay computing it
-    * until it's absolutely necessary to try and avoid stalls.
-    */
-   brw_obj->vertices_written_valid = false;
-}
-
-void
-gfx7_pause_transform_feedback(struct gl_context *ctx,
-                              struct gl_transform_feedback_object *obj)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_transform_feedback_object *brw_obj =
-      (struct brw_transform_feedback_object *) obj;
-
-   /* Flush any drawing so that the counters have the right values. */
-   brw_emit_mi_flush(brw);
-
-   assert(brw->screen->devinfo.ver == 7);
-
-   /* Save the SOL buffer offset register values. */
-   for (int i = 0; i < 4; i++) {
-      BEGIN_BATCH(3);
-      OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
-      OUT_BATCH(GFX7_SO_WRITE_OFFSET(i));
-      OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t));
-      ADVANCE_BATCH();
-   }
-
-   /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters.
-    * While this operation is paused, other transform feedback actions may
-    * occur, which will contribute to the counters.  We need to exclude that
-    * from our counts.
-    */
-   brw_save_primitives_written_counters(brw, brw_obj);
-}
-
-void
-gfx7_resume_transform_feedback(struct gl_context *ctx,
-                               struct gl_transform_feedback_object *obj)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_transform_feedback_object *brw_obj =
-      (struct brw_transform_feedback_object *) obj;
-
-   assert(brw->screen->devinfo.ver == 7);
-
-   /* Reload the SOL buffer offset registers. */
-   for (int i = 0; i < 4; i++) {
-      BEGIN_BATCH(3);
-      OUT_BATCH(GFX7_MI_LOAD_REGISTER_MEM | (3 - 2));
-      OUT_BATCH(GFX7_SO_WRITE_OFFSET(i));
-      OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t));
-      ADVANCE_BATCH();
-   }
-
-   /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
-   brw_save_primitives_written_counters(brw, brw_obj);
-}
diff --git a/src/mesa/drivers/dri/i965/gfx7_urb.c b/src/mesa/drivers/dri/i965/gfx7_urb.c
deleted file mode 100644
index ff00108..0000000
--- a/src/mesa/drivers/dri/i965/gfx7_urb.c
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
- * Copyright Â© 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "main/macros.h"
-#include "brw_batch.h"
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-
-#include "common/intel_l3_config.h"
-
-/**
- * The following diagram shows how we partition the URB:
- *
- *        16kB or 32kB               Rest of the URB space
- *   __________-__________   _________________-_________________
- *  /                     \ /                                   \
- * +-------------------------------------------------------------+
- * |  VS/HS/DS/GS/FS Push  |           VS/HS/DS/GS URB           |
- * |       Constants       |               Entries               |
- * +-------------------------------------------------------------+
- *
- * Notably, push constants must be stored at the beginning of the URB
- * space, while entries can be stored anywhere.  Ivybridge and Haswell
- * GT1/GT2 have a maximum constant buffer size of 16kB, while Haswell GT3
- * doubles this (32kB).
- *
- * Ivybridge and Haswell GT1/GT2 allow push constants to be located (and
- * sized) in increments of 1kB.  Haswell GT3 requires them to be located and
- * sized in increments of 2kB.
- *
- * Currently we split the constant buffer space evenly among whatever stages
- * are active.  This is probably not ideal, but simple.
- *
- * Ivybridge GT1 and Haswell GT1 have 128kB of URB space.
- * Ivybridge GT2 and Haswell GT2 have 256kB of URB space.
- * Haswell GT3 has 512kB of URB space.
- *
- * See "Volume 2a: 3D Pipeline," section 1.8, "Volume 1b: Configurations",
- * and the documentation for 3DSTATE_PUSH_CONSTANT_ALLOC_xS.
- */
-static void
-gfx7_allocate_push_constants(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   /* BRW_NEW_GEOMETRY_PROGRAM */
-   bool gs_present = brw->programs[MESA_SHADER_GEOMETRY];
-
-   /* BRW_NEW_TESS_PROGRAMS */
-   bool tess_present = brw->programs[MESA_SHADER_TESS_EVAL];
-
-   unsigned avail_size = 16;
-   unsigned multiplier = devinfo->max_constant_urb_size_kb / 16;
-
-   int stages = 2 + gs_present + 2 * tess_present;
-
-   /* Divide up the available space equally between stages.  Because we
-    * round down (using floor division), there may be some left over
-    * space.  We allocate that to the pixel shader stage.
-    */
-   unsigned size_per_stage = avail_size / stages;
-
-   unsigned vs_size = size_per_stage;
-   unsigned hs_size = tess_present ? size_per_stage : 0;
-   unsigned ds_size = tess_present ? size_per_stage : 0;
-   unsigned gs_size = gs_present ? size_per_stage : 0;
-   unsigned fs_size = avail_size - size_per_stage * (stages - 1);
-
-   gfx7_emit_push_constant_state(brw, multiplier * vs_size,
-                                 multiplier * hs_size, multiplier * ds_size,
-                                 multiplier * gs_size, multiplier * fs_size);
-
-   /* From p115 of the Ivy Bridge PRM (3.2.1.4 3DSTATE_PUSH_CONSTANT_ALLOC_VS):
-    *
-    *     Programming Restriction:
-    *
-    *     The 3DSTATE_CONSTANT_VS must be reprogrammed prior to the next
-    *     3DPRIMITIVE command after programming the
-    *     3DSTATE_PUSH_CONSTANT_ALLOC_VS.
-    *
-    * Similar text exists for the other 3DSTATE_PUSH_CONSTANT_ALLOC_*
-    * commands.
-    */
-   brw->vs.base.push_constants_dirty = true;
-   brw->tcs.base.push_constants_dirty = true;
-   brw->tes.base.push_constants_dirty = true;
-   brw->gs.base.push_constants_dirty = true;
-   brw->wm.base.push_constants_dirty = true;
-}
-
-void
-gfx7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
-                              unsigned hs_size, unsigned ds_size,
-                              unsigned gs_size, unsigned fs_size)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   unsigned offset = 0;
-
-   /* From the SKL PRM, Workarounds section (#878):
-    *
-    *    Push constant buffer corruption possible. WA: Insert 2 zero-length
-    *    PushConst_PS before every intended PushConst_PS update, issue a
-    *    NULLPRIM after each of the zero len PC update to make sure CS commits
-    *    them.
-    *
-    * This workaround is attempting to solve a pixel shader push constant
-    * synchronization issue.
-    *
-    * There's an unpublished WA that involves re-emitting
-    * 3DSTATE_PUSH_CONSTANT_ALLOC_PS for every 500-ish 3DSTATE_CONSTANT_PS
-    * packets. Since our counting methods may not be reliable due to
-    * context-switching and pre-emption, we instead choose to approximate this
-    * behavior by re-emitting the packet at the top of the batch.
-    */
-   if (brw->ctx.NewDriverState == BRW_NEW_BATCH) {
-       /* SKL GT2 and GLK 2x6 have reliably demonstrated this issue thus far.
-        * We've also seen some intermittent failures from SKL GT4 and BXT in
-        * the past.
-        */
-      if (devinfo->platform != INTEL_PLATFORM_SKL &&
-          devinfo->platform != INTEL_PLATFORM_BXT &&
-          devinfo->platform != INTEL_PLATFORM_GLK)
-         return;
-   }
-
-   BEGIN_BATCH(10);
-   OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2));
-   OUT_BATCH(vs_size | offset << GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
-   offset += vs_size;
-
-   OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_HS << 16 | (2 - 2));
-   OUT_BATCH(hs_size | offset << GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
-   offset += hs_size;
-
-   OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_DS << 16 | (2 - 2));
-   OUT_BATCH(ds_size | offset << GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
-   offset += ds_size;
-
-   OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_GS << 16 | (2 - 2));
-   OUT_BATCH(gs_size | offset << GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
-   offset += gs_size;
-
-   OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2));
-   OUT_BATCH(fs_size | offset << GFX7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
-   ADVANCE_BATCH();
-
-   /* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS):
-    *
-    *     A PIPE_CONTROL command with the CS Stall bit set must be programmed
-    *     in the ring after this instruction.
-    *
-    * No such restriction exists for Haswell or Baytrail.
-    */
-   if (devinfo->verx10 <= 70 && devinfo->platform != INTEL_PLATFORM_BYT)
-      gfx7_emit_cs_stall_flush(brw);
-}
-
-const struct brw_tracked_state gfx7_push_constant_space = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_CONTEXT |
-             BRW_NEW_BATCH | /* Push constant workaround */
-             BRW_NEW_GEOMETRY_PROGRAM |
-             BRW_NEW_TESS_PROGRAMS,
-   },
-   .emit = gfx7_allocate_push_constants,
-};
-
-static void
-upload_urb(struct brw_context *brw)
-{
-   /* BRW_NEW_VS_PROG_DATA */
-   const struct brw_vue_prog_data *vs_vue_prog_data =
-      brw_vue_prog_data(brw->vs.base.prog_data);
-   const unsigned vs_size = MAX2(vs_vue_prog_data->urb_entry_size, 1);
-   /* BRW_NEW_GS_PROG_DATA */
-   const bool gs_present = brw->gs.base.prog_data;
-   /* BRW_NEW_TES_PROG_DATA */
-   const bool tess_present = brw->tes.base.prog_data;
-
-   gfx7_upload_urb(brw, vs_size, gs_present, tess_present);
-}
-
-void
-gfx7_upload_urb(struct brw_context *brw, unsigned vs_size,
-                bool gs_present, bool tess_present)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   /* BRW_NEW_{VS,TCS,TES,GS}_PROG_DATA */
-   struct brw_vue_prog_data *prog_data[4] = {
-      [MESA_SHADER_VERTEX] =
-         brw_vue_prog_data(brw->vs.base.prog_data),
-      [MESA_SHADER_TESS_CTRL] =
-         tess_present ? brw_vue_prog_data(brw->tcs.base.prog_data) : NULL,
-      [MESA_SHADER_TESS_EVAL] =
-         tess_present ? brw_vue_prog_data(brw->tes.base.prog_data) : NULL,
-      [MESA_SHADER_GEOMETRY] =
-         gs_present ? brw_vue_prog_data(brw->gs.base.prog_data) : NULL,
-   };
-
-   unsigned entry_size[4];
-   entry_size[MESA_SHADER_VERTEX] = vs_size;
-   for (int i = MESA_SHADER_TESS_CTRL; i <= MESA_SHADER_GEOMETRY; i++) {
-      entry_size[i] = prog_data[i] ? prog_data[i]->urb_entry_size : 1;
-   }
-
-   /* If we're just switching between programs with the same URB requirements,
-    * skip the rest of the logic.
-    */
-   if (brw->urb.vsize == entry_size[MESA_SHADER_VERTEX] &&
-       brw->urb.gs_present == gs_present &&
-       brw->urb.gsize == entry_size[MESA_SHADER_GEOMETRY] &&
-       brw->urb.tess_present == tess_present &&
-       brw->urb.hsize == entry_size[MESA_SHADER_TESS_CTRL] &&
-       brw->urb.dsize == entry_size[MESA_SHADER_TESS_EVAL]) {
-      return;
-   }
-   brw->urb.vsize = entry_size[MESA_SHADER_VERTEX];
-   brw->urb.gs_present = gs_present;
-   brw->urb.gsize = entry_size[MESA_SHADER_GEOMETRY];
-   brw->urb.tess_present = tess_present;
-   brw->urb.hsize = entry_size[MESA_SHADER_TESS_CTRL];
-   brw->urb.dsize = entry_size[MESA_SHADER_TESS_EVAL];
-
-   unsigned entries[4];
-   unsigned start[4];
-   bool constrained;
-   intel_get_urb_config(devinfo, brw->l3.config,
-                        tess_present, gs_present, entry_size,
-                        entries, start, NULL, &constrained);
-
-   if (devinfo->platform == INTEL_PLATFORM_IVB)
-      gfx7_emit_vs_workaround_flush(brw);
-
-   BEGIN_BATCH(8);
-   for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
-      assert(devinfo->ver != 10 || entry_size[i] % 3);
-      OUT_BATCH((_3DSTATE_URB_VS + i) << 16 | (2 - 2));
-      OUT_BATCH(entries[i] |
-                ((entry_size[i] - 1) << GFX7_URB_ENTRY_SIZE_SHIFT) |
-                (start[i] << GFX7_URB_STARTING_ADDRESS_SHIFT));
-   }
-   ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state gfx7_urb = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BLORP |
-             BRW_NEW_CONTEXT |
-             BRW_NEW_URB_SIZE |
-             BRW_NEW_GS_PROG_DATA |
-             BRW_NEW_TCS_PROG_DATA |
-             BRW_NEW_TES_PROG_DATA |
-             BRW_NEW_VS_PROG_DATA,
-   },
-   .emit = upload_urb,
-};
diff --git a/src/mesa/drivers/dri/i965/gfx8_depth_state.c b/src/mesa/drivers/dri/i965/gfx8_depth_state.c
deleted file mode 100644
index e041355..0000000
--- a/src/mesa/drivers/dri/i965/gfx8_depth_state.c
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Copyright Â© 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_batch.h"
-#include "brw_mipmap_tree.h"
-#include "brw_fbo.h"
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "compiler/brw_eu_defines.h"
-#include "brw_wm.h"
-#include "main/framebuffer.h"
-
-/**
- * Should we set the PMA FIX ENABLE bit?
- *
- * To avoid unnecessary depth related stalls, we need to set this bit.
- * However, there is a very complicated formula which governs when it
- * is legal to do so.  This function computes that.
- *
- * See the documenation for the CACHE_MODE_1 register, bit 11.
- */
-static bool
-pma_fix_enable(const struct brw_context *brw)
-{
-   const struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_FS_PROG_DATA */
-   const struct brw_wm_prog_data *wm_prog_data =
-      brw_wm_prog_data(brw->wm.base.prog_data);
-   /* _NEW_BUFFERS */
-   struct brw_renderbuffer *depth_irb =
-      brw_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
-
-   /* 3DSTATE_WM::ForceThreadDispatch is never used. */
-   const bool wm_force_thread_dispatch = false;
-
-   /* 3DSTATE_RASTER::ForceSampleCount is never used. */
-   const bool raster_force_sample_count_nonzero = false;
-
-   /* _NEW_BUFFERS:
-    * 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
-    * 3DSTATE_DEPTH_BUFFER::HIZ Enable
-    */
-   const bool hiz_enabled = depth_irb && brw_renderbuffer_has_hiz(depth_irb);
-
-   /* 3DSTATE_WM::Early Depth/Stencil Control != EDSC_PREPS (2). */
-   const bool edsc_not_preps = !wm_prog_data->early_fragment_tests;
-
-   /* 3DSTATE_PS_EXTRA::PixelShaderValid is always true. */
-   const bool pixel_shader_valid = true;
-
-   /* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
-    *   3DSTATE_WM_HZ_OP::DepthBufferResolve ||
-    *   3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
-    *   3DSTATE_WM_HZ_OP::StencilBufferClear)
-    *
-    * HiZ operations are done outside of the normal state upload, so they're
-    * definitely not happening now.
-    */
-   const bool in_hiz_op = false;
-
-   /* _NEW_DEPTH:
-    * DEPTH_STENCIL_STATE::DepthTestEnable
-    */
-   const bool depth_test_enabled = depth_irb && ctx->Depth.Test;
-
-   /* _NEW_DEPTH:
-    * 3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
-    * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE.
-    */
-   const bool depth_writes_enabled = brw_depth_writes_enabled(brw);
-
-   /* _NEW_STENCIL:
-    * !DEPTH_STENCIL_STATE::Stencil Buffer Write Enable ||
-    * !3DSTATE_DEPTH_BUFFER::Stencil Buffer Enable ||
-    * !3DSTATE_STENCIL_BUFFER::Stencil Buffer Enable
-    */
-   const bool stencil_writes_enabled = brw->stencil_write_enabled;
-
-   /* 3DSTATE_PS_EXTRA::Pixel Shader Computed Depth Mode != PSCDEPTH_OFF */
-   const bool ps_computes_depth =
-      wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
-
-   /* BRW_NEW_FS_PROG_DATA:     3DSTATE_PS_EXTRA::PixelShaderKillsPixels
-    * BRW_NEW_FS_PROG_DATA:     3DSTATE_PS_EXTRA::oMask Present to RenderTarget
-    * _NEW_MULTISAMPLE:         3DSTATE_PS_BLEND::AlphaToCoverageEnable
-    * _NEW_COLOR:               3DSTATE_PS_BLEND::AlphaTestEnable
-    * _NEW_BUFFERS:             3DSTATE_PS_BLEND::AlphaTestEnable
-    *                           3DSTATE_PS_BLEND::AlphaToCoverageEnable
-    *
-    * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable is always false.
-    * 3DSTATE_WM::ForceKillPix != ForceOff is always true.
-    */
-   const bool kill_pixel =
-      wm_prog_data->uses_kill ||
-      wm_prog_data->uses_omask ||
-      _mesa_is_alpha_test_enabled(ctx) ||
-      _mesa_is_alpha_to_coverage_enabled(ctx);
-
-   /* The big formula in CACHE_MODE_1::NP PMA FIX ENABLE. */
-   return !wm_force_thread_dispatch &&
-          !raster_force_sample_count_nonzero &&
-          hiz_enabled &&
-          edsc_not_preps &&
-          pixel_shader_valid &&
-          !in_hiz_op &&
-          depth_test_enabled &&
-          (ps_computes_depth ||
-           (kill_pixel && (depth_writes_enabled || stencil_writes_enabled)));
-}
-
-void
-gfx8_write_pma_stall_bits(struct brw_context *brw, uint32_t pma_stall_bits)
-{
-   /* If we haven't actually changed the value, bail now to avoid unnecessary
-    * pipeline stalls and register writes.
-    */
-   if (brw->pma_stall_bits == pma_stall_bits)
-      return;
-
-   brw->pma_stall_bits = pma_stall_bits;
-
-   /* According to the PIPE_CONTROL documentation, software should emit a
-    * PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set prior
-    * to the LRI.  If stencil buffer writes are enabled, then a Render Cache
-    * Flush is also necessary.
-    */
-   const uint32_t render_cache_flush =
-      brw->stencil_write_enabled ? PIPE_CONTROL_RENDER_TARGET_FLUSH : 0;
-   brw_emit_pipe_control_flush(brw,
-                               PIPE_CONTROL_CS_STALL |
-                               PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                               render_cache_flush);
-
-   /* CACHE_MODE_1 is a non-privileged register. */
-   brw_load_register_imm32(brw, GFX7_CACHE_MODE_1,
-                           GFX8_HIZ_PMA_MASK_BITS |
-                           pma_stall_bits );
-
-   /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
-    * Flush bits is often necessary.  We do it regardless because it's easier.
-    * The render cache flush is also necessary if stencil writes are enabled.
-    */
-   brw_emit_pipe_control_flush(brw,
-                               PIPE_CONTROL_DEPTH_STALL |
-                               PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                               render_cache_flush);
-
-}
-
-static void
-gfx8_emit_pma_stall_workaround(struct brw_context *brw)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   uint32_t bits = 0;
-
-   if (devinfo->ver >= 9)
-      return;
-
-   if (pma_fix_enable(brw))
-      bits |= GFX8_HIZ_NP_PMA_FIX_ENABLE | GFX8_HIZ_NP_EARLY_Z_FAILS_DISABLE;
-
-   gfx8_write_pma_stall_bits(brw, bits);
-}
-
-const struct brw_tracked_state gfx8_pma_fix = {
-   .dirty = {
-      .mesa = _NEW_BUFFERS |
-              _NEW_COLOR |
-              _NEW_DEPTH |
-              _NEW_MULTISAMPLE |
-              _NEW_STENCIL,
-      .brw = BRW_NEW_BLORP |
-             BRW_NEW_FS_PROG_DATA,
-   },
-   .emit = gfx8_emit_pma_stall_workaround
-};
diff --git a/src/mesa/drivers/dri/i965/gfx8_multisample_state.c b/src/mesa/drivers/dri/i965/gfx8_multisample_state.c
deleted file mode 100644
index 5724463..0000000
--- a/src/mesa/drivers/dri/i965/gfx8_multisample_state.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright Â© 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_batch.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_multisample_state.h"
-
-/**
- * 3DSTATE_SAMPLE_PATTERN
- */
-void
-gfx8_emit_3dstate_sample_pattern(struct brw_context *brw)
-{
-   BEGIN_BATCH(9);
-   OUT_BATCH(_3DSTATE_SAMPLE_PATTERN << 16 | (9 - 2));
-
-   /* 16x MSAA */
-   OUT_BATCH(brw_multisample_positions_16x[0]); /* positions  3,  2,  1,  0 */
-   OUT_BATCH(brw_multisample_positions_16x[1]); /* positions  7,  6,  5,  4 */
-   OUT_BATCH(brw_multisample_positions_16x[2]); /* positions 11, 10,  9,  8 */
-   OUT_BATCH(brw_multisample_positions_16x[3]); /* positions 15, 14, 13, 12 */
-
-   /* 8x MSAA */
-   OUT_BATCH(brw_multisample_positions_8x[1]); /* sample positions 7654 */
-   OUT_BATCH(brw_multisample_positions_8x[0]); /* sample positions 3210 */
-
-   /* 4x MSAA */
-   OUT_BATCH(brw_multisample_positions_4x);
-
-   /* 1x and 2x MSAA */
-   OUT_BATCH(brw_multisample_positions_1x_2x);
-   ADVANCE_BATCH();
-}
diff --git a/src/mesa/drivers/dri/i965/hsw_queryobj.c b/src/mesa/drivers/dri/i965/hsw_queryobj.c
deleted file mode 100644
index da195c8..0000000
--- a/src/mesa/drivers/dri/i965/hsw_queryobj.c
+++ /dev/null
@@ -1,486 +0,0 @@
-/*
- * Copyright (c) 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-/** @file hsw_queryobj.c
- *
- * Support for query buffer objects (GL_ARB_query_buffer_object) on Haswell+.
- */
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-
-/*
- * GPR0 = 80 * GPR0;
- */
-static void
-mult_gpr0_by_80(struct brw_context *brw)
-{
-   static const uint32_t maths[] = {
-      MI_MATH_ALU2(LOAD, SRCA, R0),
-      MI_MATH_ALU2(LOAD, SRCB, R0),
-      MI_MATH_ALU0(ADD),
-      MI_MATH_ALU2(STORE, R1, ACCU),
-      MI_MATH_ALU2(LOAD, SRCA, R1),
-      MI_MATH_ALU2(LOAD, SRCB, R1),
-      MI_MATH_ALU0(ADD),
-      MI_MATH_ALU2(STORE, R1, ACCU),
-      MI_MATH_ALU2(LOAD, SRCA, R1),
-      MI_MATH_ALU2(LOAD, SRCB, R1),
-      MI_MATH_ALU0(ADD),
-      MI_MATH_ALU2(STORE, R1, ACCU),
-      MI_MATH_ALU2(LOAD, SRCA, R1),
-      MI_MATH_ALU2(LOAD, SRCB, R1),
-      MI_MATH_ALU0(ADD),
-      /* GPR1 = 16 * GPR0 */
-      MI_MATH_ALU2(STORE, R1, ACCU),
-      MI_MATH_ALU2(LOAD, SRCA, R1),
-      MI_MATH_ALU2(LOAD, SRCB, R1),
-      MI_MATH_ALU0(ADD),
-      MI_MATH_ALU2(STORE, R2, ACCU),
-      MI_MATH_ALU2(LOAD, SRCA, R2),
-      MI_MATH_ALU2(LOAD, SRCB, R2),
-      MI_MATH_ALU0(ADD),
-      /* GPR2 = 64 * GPR0 */
-      MI_MATH_ALU2(STORE, R2, ACCU),
-      MI_MATH_ALU2(LOAD, SRCA, R1),
-      MI_MATH_ALU2(LOAD, SRCB, R2),
-      MI_MATH_ALU0(ADD),
-      /* GPR0 = 80 * GPR0 */
-      MI_MATH_ALU2(STORE, R0, ACCU),
-   };
-
-   BEGIN_BATCH(1 + ARRAY_SIZE(maths));
-   OUT_BATCH(HSW_MI_MATH | (1 + ARRAY_SIZE(maths) - 2));
-
-   for (int m = 0; m < ARRAY_SIZE(maths); m++)
-      OUT_BATCH(maths[m]);
-
-   ADVANCE_BATCH();
-}
-
-/*
- * GPR0 = GPR0 & ((1ull << n) - 1);
- */
-static void
-keep_gpr0_lower_n_bits(struct brw_context *brw, uint32_t n)
-{
-   static const uint32_t maths[] = {
-      MI_MATH_ALU2(LOAD, SRCA, R0),
-      MI_MATH_ALU2(LOAD, SRCB, R1),
-      MI_MATH_ALU0(AND),
-      MI_MATH_ALU2(STORE, R0, ACCU),
-   };
-
-   assert(n < 64);
-   brw_load_register_imm64(brw, HSW_CS_GPR(1), (1ull << n) - 1);
-
-   BEGIN_BATCH(1 + ARRAY_SIZE(maths));
-   OUT_BATCH(HSW_MI_MATH | (1 + ARRAY_SIZE(maths) - 2));
-
-   for (int m = 0; m < ARRAY_SIZE(maths); m++)
-      OUT_BATCH(maths[m]);
-
-   ADVANCE_BATCH();
-}
-
-/*
- * GPR0 = GPR0 << 30;
- */
-static void
-shl_gpr0_by_30_bits(struct brw_context *brw)
-{
-   /* First we mask 34 bits of GPR0 to prevent overflow */
-   keep_gpr0_lower_n_bits(brw, 34);
-
-   static const uint32_t shl_maths[] = {
-      MI_MATH_ALU2(LOAD, SRCA, R0),
-      MI_MATH_ALU2(LOAD, SRCB, R0),
-      MI_MATH_ALU0(ADD),
-      MI_MATH_ALU2(STORE, R0, ACCU),
-   };
-
-   const uint32_t outer_count = 5;
-   const uint32_t inner_count = 6;
-   STATIC_ASSERT(outer_count * inner_count == 30);
-   const uint32_t cmd_len = 1 + inner_count * ARRAY_SIZE(shl_maths);
-   const uint32_t batch_len = cmd_len * outer_count;
-
-   BEGIN_BATCH(batch_len);
-
-   /* We'll emit 5 commands, each shifting GPR0 left by 6 bits, for a total of
-    * 30 left shifts.
-    */
-   for (int o = 0; o < outer_count; o++) {
-      /* Submit one MI_MATH to shift left by 6 bits */
-      OUT_BATCH(HSW_MI_MATH | (cmd_len - 2));
-      for (int i = 0; i < inner_count; i++)
-         for (int m = 0; m < ARRAY_SIZE(shl_maths); m++)
-            OUT_BATCH(shl_maths[m]);
-   }
-
-   ADVANCE_BATCH();
-}
-
-/*
- * GPR0 = GPR0 >> 2;
- *
- * Note that the upper 30 bits of GPR0 are lost!
- */
-static void
-shr_gpr0_by_2_bits(struct brw_context *brw)
-{
-   shl_gpr0_by_30_bits(brw);
-   brw_load_register_reg(brw, HSW_CS_GPR(0), HSW_CS_GPR(0) + 4);
-   brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0);
-}
-
-/*
- * GPR0 = (GPR0 == 0) ? 0 : 1;
- */
-static void
-gpr0_to_bool(struct brw_context *brw)
-{
-   static const uint32_t maths[] = {
-      MI_MATH_ALU2(LOAD, SRCA, R0),
-      MI_MATH_ALU1(LOAD0, SRCB),
-      MI_MATH_ALU0(ADD),
-      MI_MATH_ALU2(STOREINV, R0, ZF),
-      MI_MATH_ALU2(LOAD, SRCA, R0),
-      MI_MATH_ALU2(LOAD, SRCB, R1),
-      MI_MATH_ALU0(AND),
-      MI_MATH_ALU2(STORE, R0, ACCU),
-   };
-
-   brw_load_register_imm64(brw, HSW_CS_GPR(1), 1ull);
-
-   BEGIN_BATCH(1 + ARRAY_SIZE(maths));
-   OUT_BATCH(HSW_MI_MATH | (1 + ARRAY_SIZE(maths) - 2));
-
-   for (int m = 0; m < ARRAY_SIZE(maths); m++)
-      OUT_BATCH(maths[m]);
-
-   ADVANCE_BATCH();
-}
-
-static void
-load_overflow_data_to_cs_gprs(struct brw_context *brw,
-                              struct brw_query_object *query,
-                              int idx)
-{
-   int offset = idx * sizeof(uint64_t) * 4;
-
-   brw_load_register_mem64(brw, HSW_CS_GPR(1), query->bo, offset);
-
-   offset += sizeof(uint64_t);
-   brw_load_register_mem64(brw, HSW_CS_GPR(2), query->bo, offset);
-
-   offset += sizeof(uint64_t);
-   brw_load_register_mem64(brw, HSW_CS_GPR(3), query->bo, offset);
-
-   offset += sizeof(uint64_t);
-   brw_load_register_mem64(brw, HSW_CS_GPR(4), query->bo, offset);
-}
-
-/*
- * R3 = R4 - R3;
- * R1 = R2 - R1;
- * R1 = R3 - R1;
- * R0 = R0 | R1;
- */
-static void
-calc_overflow_for_stream(struct brw_context *brw)
-{
-   static const uint32_t maths[] = {
-      MI_MATH_ALU2(LOAD, SRCA, R4),
-      MI_MATH_ALU2(LOAD, SRCB, R3),
-      MI_MATH_ALU0(SUB),
-      MI_MATH_ALU2(STORE, R3, ACCU),
-      MI_MATH_ALU2(LOAD, SRCA, R2),
-      MI_MATH_ALU2(LOAD, SRCB, R1),
-      MI_MATH_ALU0(SUB),
-      MI_MATH_ALU2(STORE, R1, ACCU),
-      MI_MATH_ALU2(LOAD, SRCA, R3),
-      MI_MATH_ALU2(LOAD, SRCB, R1),
-      MI_MATH_ALU0(SUB),
-      MI_MATH_ALU2(STORE, R1, ACCU),
-      MI_MATH_ALU2(LOAD, SRCA, R1),
-      MI_MATH_ALU2(LOAD, SRCB, R0),
-      MI_MATH_ALU0(OR),
-      MI_MATH_ALU2(STORE, R0, ACCU),
-   };
-
-   BEGIN_BATCH(1 + ARRAY_SIZE(maths));
-   OUT_BATCH(HSW_MI_MATH | (1 + ARRAY_SIZE(maths) - 2));
-
-   for (int m = 0; m < ARRAY_SIZE(maths); m++)
-      OUT_BATCH(maths[m]);
-
-   ADVANCE_BATCH();
-}
-
-static void
-calc_overflow_to_gpr0(struct brw_context *brw, struct brw_query_object *query,
-                       int count)
-{
-   brw_load_register_imm64(brw, HSW_CS_GPR(0), 0ull);
-
-   for (int i = 0; i < count; i++) {
-      load_overflow_data_to_cs_gprs(brw, query, i);
-      calc_overflow_for_stream(brw);
-   }
-}
-
-/*
- * Take a query and calculate whether there was overflow during transform
- * feedback. Store the result in the gpr0 register.
- */
-void
-hsw_overflow_result_to_gpr0(struct brw_context *brw,
-                            struct brw_query_object *query,
-                            int count)
-{
-   calc_overflow_to_gpr0(brw, query, count);
-   gpr0_to_bool(brw);
-}
-
-static void
-hsw_result_to_gpr0(struct gl_context *ctx, struct brw_query_object *query,
-                   struct gl_buffer_object *buf, intptr_t offset,
-                   GLenum pname, GLenum ptype)
-{
-   struct brw_context *brw = brw_context(ctx);
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   assert(query->bo);
-   assert(pname != GL_QUERY_TARGET);
-
-   if (pname == GL_QUERY_RESULT_AVAILABLE) {
-      /* The query result availability is stored at offset 0 of the buffer. */
-      brw_load_register_mem64(brw,
-                              HSW_CS_GPR(0),
-                              query->bo,
-                              2 * sizeof(uint64_t));
-      return;
-   }
-
-   if (pname == GL_QUERY_RESULT) {
-      /* Since GL_QUERY_RESULT_NO_WAIT wasn't used, they want us to stall to
-       * make sure the query is available.
-       */
-      brw_emit_pipe_control_flush(brw,
-                                  PIPE_CONTROL_CS_STALL |
-                                  PIPE_CONTROL_STALL_AT_SCOREBOARD);
-   }
-
-   if (query->Base.Target == GL_TIMESTAMP) {
-      brw_load_register_mem64(brw,
-                              HSW_CS_GPR(0),
-                              query->bo,
-                              0 * sizeof(uint64_t));
-   } else if (query->Base.Target == GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB
-              || query->Base.Target == GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB) {
-      /* Don't do anything in advance here, since the math for this is a little
-       * more complex.
-       */
-   } else {
-      brw_load_register_mem64(brw,
-                              HSW_CS_GPR(1),
-                              query->bo,
-                              0 * sizeof(uint64_t));
-      brw_load_register_mem64(brw,
-                              HSW_CS_GPR(2),
-                              query->bo,
-                              1 * sizeof(uint64_t));
-
-      BEGIN_BATCH(5);
-      OUT_BATCH(HSW_MI_MATH | (5 - 2));
-
-      OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R2));
-      OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
-      OUT_BATCH(MI_MATH_ALU0(SUB));
-      OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
-
-      ADVANCE_BATCH();
-   }
-
-   switch (query->Base.Target) {
-   case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
-      /* Implement the "WaDividePSInvocationCountBy4:HSW,BDW" workaround:
-       * "Invocation counter is 4 times actual.  WA: SW to divide HW reported
-       *  PS Invocations value by 4."
-       *
-       * Prior to Haswell, invocation count was counted by the WM, and it
-       * buggily counted invocations in units of subspans (2x2 unit). To get the
-       * correct value, the CS multiplied this by 4. With HSW the logic moved,
-       * and correctly emitted the number of pixel shader invocations, but,
-       * whomever forgot to undo the multiply by 4.
-       */
-      if (devinfo->ver == 8 || devinfo->platform == INTEL_PLATFORM_HSW)
-         shr_gpr0_by_2_bits(brw);
-      break;
-   case GL_TIME_ELAPSED:
-   case GL_TIMESTAMP:
-      mult_gpr0_by_80(brw);
-      if (query->Base.Target == GL_TIMESTAMP) {
-         keep_gpr0_lower_n_bits(brw, 36);
-      }
-      break;
-   case GL_ANY_SAMPLES_PASSED:
-   case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
-      gpr0_to_bool(brw);
-      break;
-   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
-      hsw_overflow_result_to_gpr0(brw, query, 1);
-      break;
-   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
-      hsw_overflow_result_to_gpr0(brw, query, MAX_VERTEX_STREAMS);
-      break;
-   }
-}
-
-/*
- * Store immediate data into the user buffer using the requested size.
- */
-static void
-store_query_result_imm(struct brw_context *brw, struct brw_bo *bo,
-                       uint32_t offset, GLenum ptype, uint64_t imm)
-{
-   switch (ptype) {
-   case GL_INT:
-   case GL_UNSIGNED_INT:
-      brw_store_data_imm32(brw, bo, offset, imm);
-      break;
-   case GL_INT64_ARB:
-   case GL_UNSIGNED_INT64_ARB:
-      brw_store_data_imm64(brw, bo, offset, imm);
-      break;
-   default:
-      unreachable("Unexpected result type");
-   }
-}
-
-static void
-set_predicate(struct brw_context *brw, struct brw_bo *query_bo)
-{
-   brw_load_register_imm64(brw, MI_PREDICATE_SRC1, 0ull);
-
-   /* Load query availability into SRC0 */
-   brw_load_register_mem64(brw, MI_PREDICATE_SRC0, query_bo,
-                           2 * sizeof(uint64_t));
-
-   /* predicate = !(query_availability == 0); */
-   BEGIN_BATCH(1);
-   OUT_BATCH(GFX7_MI_PREDICATE |
-             MI_PREDICATE_LOADOP_LOADINV |
-             MI_PREDICATE_COMBINEOP_SET |
-             MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
-   ADVANCE_BATCH();
-}
-
-/*
- * Store data from the register into the user buffer using the requested size.
- * The write also enables the predication to prevent writing the result if the
- * query has not finished yet.
- */
-static void
-store_query_result_reg(struct brw_context *brw, struct brw_bo *bo,
-                       uint32_t offset, GLenum ptype, uint32_t reg,
-                       const bool pipelined)
-{
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-   uint32_t cmd_size = devinfo->ver >= 8 ? 4 : 3;
-   uint32_t dwords = (ptype == GL_INT || ptype == GL_UNSIGNED_INT) ? 1 : 2;
-   assert(devinfo->ver >= 6);
-
-   BEGIN_BATCH(dwords * cmd_size);
-   for (int i = 0; i < dwords; i++) {
-      OUT_BATCH(MI_STORE_REGISTER_MEM |
-                (pipelined ? MI_STORE_REGISTER_MEM_PREDICATE : 0) |
-                (cmd_size - 2));
-      OUT_BATCH(reg + 4 * i);
-      if (devinfo->ver >= 8) {
-         OUT_RELOC64(bo, RELOC_WRITE, offset + 4 * i);
-      } else {
-         OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, offset + 4 * i);
-      }
-   }
-   ADVANCE_BATCH();
-}
-
-static void
-hsw_store_query_result(struct gl_context *ctx, struct gl_query_object *q,
-                       struct gl_buffer_object *buf, intptr_t offset,
-                       GLenum pname, GLenum ptype)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_query_object *query = (struct brw_query_object *)q;
-   struct brw_buffer_object *bo = brw_buffer_object(buf);
-   const bool pipelined = brw_is_query_pipelined(query);
-
-   if (pname == GL_QUERY_TARGET) {
-      store_query_result_imm(brw, bo->buffer, offset, ptype,
-                             query->Base.Target);
-      return;
-   } else if (pname == GL_QUERY_RESULT_AVAILABLE && !pipelined) {
-      store_query_result_imm(brw, bo->buffer, offset, ptype, 1ull);
-   } else if (query->bo) {
-      /* The query bo still around. Therefore, we:
-       *
-       *  1. Compute the current result in GPR0
-       *  2. Set the command streamer predicate based on query availability
-       *  3. (With predication) Write GPR0 to the requested buffer
-       */
-      hsw_result_to_gpr0(ctx, query, buf, offset, pname, ptype);
-      if (pipelined)
-         set_predicate(brw, query->bo);
-      store_query_result_reg(brw, bo->buffer, offset, ptype, HSW_CS_GPR(0),
-                             pipelined);
-   } else {
-      /* The query bo is gone, so the query must have been processed into
-       * client memory. In this case we can fill the buffer location with the
-       * requested data using MI_STORE_DATA_IMM.
-       */
-      switch (pname) {
-      case GL_QUERY_RESULT_AVAILABLE:
-         store_query_result_imm(brw, bo->buffer, offset, ptype, 1ull);
-         break;
-      case GL_QUERY_RESULT_NO_WAIT:
-      case GL_QUERY_RESULT:
-         store_query_result_imm(brw, bo->buffer, offset, ptype,
-                                q->Result);
-         break;
-      default:
-         unreachable("Unexpected result type");
-      }
-   }
-
-}
-
-/* Initialize hsw+-specific query object functions. */
-void hsw_init_queryobj_functions(struct dd_function_table *functions)
-{
-   gfx6_init_queryobj_functions(functions);
-   functions->StoreQueryResult = hsw_store_query_result;
-}
diff --git a/src/mesa/drivers/dri/i965/hsw_sol.c b/src/mesa/drivers/dri/i965/hsw_sol.c
deleted file mode 100644
index 8d801e5..0000000
--- a/src/mesa/drivers/dri/i965/hsw_sol.c
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Copyright Â© 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/**
- * An implementation of the transform feedback driver hooks for Haswell
- * and later hardware.  This uses MI_MATH to compute the number of vertices
- * written (for use by DrawTransformFeedback()) without any CPU<->GPU
- * synchronization which could stall.
- */
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "brw_batch.h"
-#include "brw_buffer_objects.h"
-#include "main/transformfeedback.h"
-
-/**
- * We store several values in obj->prim_count_bo:
- *
- * [4x 32-bit values]: Final Number of Vertices Written
- * [4x 32-bit values]: Tally of Primitives Written So Far
- * [4x 64-bit values]: Starting SO_NUM_PRIMS_WRITTEN Counter Snapshots
- *
- * The first set of values is used by DrawTransformFeedback(), which
- * copies one of them into the 3DPRIM_VERTEX_COUNT register and performs
- * an indirect draw.  The other values are just temporary storage.
- */
-
-#define TALLY_OFFSET (BRW_MAX_XFB_STREAMS * sizeof(uint32_t))
-#define START_OFFSET (TALLY_OFFSET * 2)
-
-/**
- * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
- * to prim_count_bo.
- */
-static void
-save_prim_start_values(struct brw_context *brw,
-                       struct brw_transform_feedback_object *obj)
-{
-   /* Flush any drawing so that the counters have the right values. */
-   brw_emit_mi_flush(brw);
-
-   /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
-   for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
-      brw_store_register_mem64(brw, obj->prim_count_bo,
-                               GFX7_SO_NUM_PRIMS_WRITTEN(i),
-                               START_OFFSET + i * sizeof(uint64_t));
-   }
-}
-
-/**
- * Compute the number of primitives written during our most recent
- * transform feedback activity (the current SO_NUM_PRIMS_WRITTEN value
- * minus the stashed "start" value), and add it to our running tally.
- *
- * If \p finalize is true, also compute the number of vertices written
- * (by multiplying by the number of vertices per primitive), and store
- * that to the "final" location.
- *
- * Otherwise, just overwrite the old tally with the new one.
- */
-static void
-tally_prims_written(struct brw_context *brw,
-                    struct brw_transform_feedback_object *obj,
-                    bool finalize)
-{
-   /* Flush any drawing so that the counters have the right values. */
-   brw_emit_mi_flush(brw);
-
-   for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
-      /* GPR0 = Tally */
-      brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0);
-      brw_load_register_mem(brw, HSW_CS_GPR(0), obj->prim_count_bo,
-                            TALLY_OFFSET + i * sizeof(uint32_t));
-      if (!obj->base.Paused) {
-         /* GPR1 = Start Snapshot */
-         brw_load_register_mem64(brw, HSW_CS_GPR(1), obj->prim_count_bo,
-                                 START_OFFSET + i * sizeof(uint64_t));
-         /* GPR2 = Ending Snapshot */
-         brw_load_register_reg64(brw, HSW_CS_GPR(2),
-                                 GFX7_SO_NUM_PRIMS_WRITTEN(i));
-
-         BEGIN_BATCH(9);
-         OUT_BATCH(HSW_MI_MATH | (9 - 2));
-         /* GPR1 = GPR2 (End) - GPR1 (Start) */
-         OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R2));
-         OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
-         OUT_BATCH(MI_MATH_ALU0(SUB));
-         OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
-         /* GPR0 = GPR0 (Tally) + GPR1 (Diff) */
-         OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
-         OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
-            OUT_BATCH(MI_MATH_ALU0(ADD));
-         OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
-         ADVANCE_BATCH();
-      }
-
-      if (!finalize) {
-         /* Write back the new tally */
-         brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
-                                  TALLY_OFFSET + i * sizeof(uint32_t));
-      } else {
-         /* Convert the number of primitives to the number of vertices. */
-         if (obj->primitive_mode == GL_LINES) {
-            /* Double R0 (R0 = R0 + R0) */
-            BEGIN_BATCH(5);
-            OUT_BATCH(HSW_MI_MATH | (5 - 2));
-            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
-            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
-            OUT_BATCH(MI_MATH_ALU0(ADD));
-            OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
-            ADVANCE_BATCH();
-         } else if (obj->primitive_mode == GL_TRIANGLES) {
-            /* Triple R0 (R1 = R0 + R0, R0 = R0 + R1) */
-            BEGIN_BATCH(9);
-            OUT_BATCH(HSW_MI_MATH | (9 - 2));
-            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
-            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
-            OUT_BATCH(MI_MATH_ALU0(ADD));
-            OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
-            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
-            OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
-            OUT_BATCH(MI_MATH_ALU0(ADD));
-            OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
-            ADVANCE_BATCH();
-         }
-         /* Store it to the final result */
-         brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
-                                  i * sizeof(uint32_t));
-      }
-   }
-}
-
-/**
- * BeginTransformFeedback() driver hook.
- */
-void
-hsw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
-                              struct gl_transform_feedback_object *obj)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_transform_feedback_object *brw_obj =
-      (struct brw_transform_feedback_object *) obj;
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   brw_obj->primitive_mode = mode;
-
-   /* Reset the SO buffer offsets to 0. */
-   if (devinfo->ver >= 8) {
-      brw_obj->zero_offsets = true;
-   } else {
-      BEGIN_BATCH(1 + 2 * BRW_MAX_XFB_STREAMS);
-      OUT_BATCH(MI_LOAD_REGISTER_IMM | (1 + 2 * BRW_MAX_XFB_STREAMS - 2));
-      for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
-         OUT_BATCH(GFX7_SO_WRITE_OFFSET(i));
-         OUT_BATCH(0);
-      }
-      ADVANCE_BATCH();
-   }
-
-   /* Zero out the initial tallies */
-   brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET,     0ull);
-   brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET + 8, 0ull);
-
-   /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
-   save_prim_start_values(brw, brw_obj);
-}
-
-/**
- * PauseTransformFeedback() driver hook.
- */
-void
-hsw_pause_transform_feedback(struct gl_context *ctx,
-                              struct gl_transform_feedback_object *obj)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_transform_feedback_object *brw_obj =
-      (struct brw_transform_feedback_object *) obj;
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (devinfo->platform == INTEL_PLATFORM_HSW) {
-      /* Flush any drawing so that the counters have the right values. */
-      brw_emit_mi_flush(brw);
-
-      /* Save the SOL buffer offset register values. */
-      for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
-         BEGIN_BATCH(3);
-         OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
-         OUT_BATCH(GFX7_SO_WRITE_OFFSET(i));
-         OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t));
-         ADVANCE_BATCH();
-      }
-   }
-
-   /* Add any primitives written to our tally */
-   tally_prims_written(brw, brw_obj, false);
-}
-
-/**
- * ResumeTransformFeedback() driver hook.
- */
-void
-hsw_resume_transform_feedback(struct gl_context *ctx,
-                               struct gl_transform_feedback_object *obj)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_transform_feedback_object *brw_obj =
-      (struct brw_transform_feedback_object *) obj;
-   const struct intel_device_info *devinfo = &brw->screen->devinfo;
-
-   if (devinfo->platform == INTEL_PLATFORM_HSW) {
-      /* Reload the SOL buffer offset registers. */
-      for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
-         BEGIN_BATCH(3);
-         OUT_BATCH(GFX7_MI_LOAD_REGISTER_MEM | (3 - 2));
-         OUT_BATCH(GFX7_SO_WRITE_OFFSET(i));
-         OUT_RELOC(brw_obj->offset_bo, RELOC_WRITE, i * sizeof(uint32_t));
-         ADVANCE_BATCH();
-      }
-   }
-
-   /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
-   save_prim_start_values(brw, brw_obj);
-}
-
-/**
- * EndTransformFeedback() driver hook.
- */
-void
-hsw_end_transform_feedback(struct gl_context *ctx,
-                           struct gl_transform_feedback_object *obj)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_transform_feedback_object *brw_obj =
-      (struct brw_transform_feedback_object *) obj;
-
-   /* Add any primitives written to our tally, convert it from the number
-    * of primitives written to the number of vertices written, and store
-    * it in the "final" location in the buffer which DrawTransformFeedback()
-    * will use as the vertex count.
-    */
-   tally_prims_written(brw, brw_obj, true);
-}
diff --git a/src/mesa/drivers/dri/i965/libdrm_macros.h b/src/mesa/drivers/dri/i965/libdrm_macros.h
deleted file mode 100644
index 2cb76d5..0000000
--- a/src/mesa/drivers/dri/i965/libdrm_macros.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright Â© 2014 NVIDIA Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef LIBDRM_LIBDRM_H
-#define LIBDRM_LIBDRM_H
-
-#include "util/macros.h"
-
-
-#include <sys/mman.h>
-
-#if defined(ANDROID) && !defined(__LP64__)
-/* 32-bit needs mmap64 for 64-bit offsets */
-#  define drm_mmap(addr, length, prot, flags, fd, offset) \
-              mmap64(addr, length, prot, flags, fd, offset)
-
-#  define drm_munmap(addr, length) \
-              munmap(addr, length)
-
-#else
-
-/* assume large file support exists */
-#  define drm_mmap(addr, length, prot, flags, fd, offset) \
-              mmap(addr, length, prot, flags, fd, offset)
-
-
-static inline int drm_munmap(void *addr, size_t length)
-{
-   /* Copied from configure code generated by AC_SYS_LARGEFILE */
-#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + \
-                     (((off_t) 1 << 31) << 31))
-   STATIC_ASSERT(LARGE_OFF_T % 2147483629 == 721 &&
-                 LARGE_OFF_T % 2147483647 == 1);
-#undef LARGE_OFF_T
-
-   return munmap(addr, length);
-}
-#endif
-
-#endif
diff --git a/src/mesa/drivers/dri/i965/meson.build b/src/mesa/drivers/dri/i965/meson.build
deleted file mode 100644
index 25bcd1a..0000000
--- a/src/mesa/drivers/dri/i965/meson.build
+++ /dev/null
@@ -1,166 +0,0 @@
-# Copyright Â© 2017 Intel Corporation
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-files_i965 = files(
-  'brw_binding_tables.c',
-  'brw_blorp.c',
-  'brw_blorp.h',
-  'brw_bufmgr.c',
-  'brw_bufmgr.h',
-  'brw_clear.c',
-  'brw_clip.c',
-  'brw_compute.c',
-  'brw_conditional_render.c',
-  'brw_context.c',
-  'brw_context.h',
-  'brw_cs.c',
-  'brw_cs.h',
-  'brw_curbe.c',
-  'brw_defines.h',
-  'brw_disk_cache.c',
-  'brw_draw.c',
-  'brw_draw.h',
-  'brw_draw_upload.c',
-  'brw_ff_gs.c',
-  'brw_ff_gs.h',
-  'brw_formatquery.c',
-  'brw_generate_mipmap.c',
-  'brw_gs.c',
-  'brw_gs.h',
-  'brw_gs_surface_state.c',
-  'brw_link.cpp',
-  'brw_meta_util.c',
-  'brw_meta_util.h',
-  'brw_misc_state.c',
-  'brw_multisample_state.h',
-  'brw_nir_uniforms.cpp',
-  'brw_object_purgeable.c',
-  'brw_pipe_control.c',
-  'brw_performance_query.c',
-  'brw_program.c',
-  'brw_program.h',
-  'brw_program_binary.c',
-  'brw_program_cache.c',
-  'brw_primitive_restart.c',
-  'brw_queryobj.c',
-  'brw_reset.c',
-  'brw_sf.c',
-  'brw_state.h',
-  'brw_state_upload.c',
-  'brw_structs.h',
-  'brw_surface_formats.c',
-  'brw_sync.c',
-  'brw_tcs.c',
-  'brw_tcs_surface_state.c',
-  'brw_tes.c',
-  'brw_tes_surface_state.c',
-  'brw_urb.c',
-  'brw_util.c',
-  'brw_util.h',
-  'brw_vs.c',
-  'brw_vs.h',
-  'brw_vs_surface_state.c',
-  'brw_wm.c',
-  'brw_wm.h',
-  'brw_wm_surface_state.c',
-  'gfx4_blorp_exec.h',
-  'gfx6_clip_state.c',
-  'gfx6_constant_state.c',
-  'gfx6_multisample_state.c',
-  'gfx6_queryobj.c',
-  'gfx6_sampler_state.c',
-  'gfx6_sol.c',
-  'gfx6_urb.c',
-  'gfx7_l3_state.c',
-  'gfx7_sol_state.c',
-  'gfx7_urb.c',
-  'gfx8_depth_state.c',
-  'gfx8_multisample_state.c',
-  'hsw_queryobj.c',
-  'hsw_sol.c',
-  'brw_batch.c',
-  'brw_batch.h',
-  'brw_blit.c',
-  'brw_blit.h',
-  'brw_buffer_objects.c',
-  'brw_buffer_objects.h',
-  'brw_buffers.c',
-  'brw_buffers.h',
-  'brw_copy_image.c',
-  'brw_extensions.c',
-  'brw_fbo.c',
-  'brw_fbo.h',
-  'brw_image.h',
-  'brw_mipmap_tree.c',
-  'brw_mipmap_tree.h',
-  'brw_pixel_bitmap.c',
-  'brw_pixel.c',
-  'brw_pixel_copy.c',
-  'brw_pixel_draw.c',
-  'brw_pixel.h',
-  'brw_pixel_read.c',
-  'brw_screen.c',
-  'brw_screen.h',
-  'brw_state.c',
-  'brw_tex.c',
-  'brw_tex_copy.c',
-  'brw_tex.h',
-  'brw_tex_image.c',
-  'brw_tex_obj.h',
-  'brw_tex_validate.c',
-  'brw_upload.c',
-  'libdrm_macros.h',
-)
-
-i965_per_hw_ver_libs = []
-foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '110']
-  i965_per_hw_ver_libs += static_library(
-    'i965_per_hw_ver@0@'.format(v),
-    ['genX_blorp_exec.c', 'genX_boilerplate.h', 'genX_pipe_control.c',
-     'genX_state_upload.c', gen_xml_pack],
-    include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_dri_common],
-    c_args : [
-      no_override_init_args, c_sse2_args,
-      '-DGFX_VERx10=@0@'.format(v),
-    ],
-    gnu_symbol_visibility : 'hidden',
-    dependencies : [dep_libdrm, idep_nir_headers, idep_mesautil],
-  )
-endforeach
-
-
-libi965 = static_library(
-  'i965',
-  [files_i965, ir_expression_operation_h],
-  include_directories : [
-    inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_intel, inc_dri_common,
-  ],
-  c_args : [no_override_init_args, c_sse2_args],
-  cpp_args : [c_sse2_args],
-  gnu_symbol_visibility : 'hidden',
-  link_with : [
-    i965_per_hw_ver_libs, libintel_dev, libisl, libintel_compiler, libblorp,
-    libintel_perf
-  ],
-  dependencies : [
-    dep_libdrm, dep_valgrind, idep_libintel_common, idep_nir_headers, idep_genxml,
-    idep_xmlconfig,
-  ],
-)
diff --git a/src/mesa/drivers/dri/meson.build b/src/mesa/drivers/dri/meson.build
index f6efd96..cea38dc 100644
--- a/src/mesa/drivers/dri/meson.build
+++ b/src/mesa/drivers/dri/meson.build
@@ -22,11 +22,6 @@ subdir('common')
 
 _dri_drivers = []
 _dri_link = []
-if with_dri_i965
-  subdir('i965')
-  _dri_drivers += libi965
-  _dri_link += 'i965_dri.so'
-endif
 
 if _dri_drivers != []
   libmesa_dri_drivers = shared_library(
-- 
2.7.4